import pandas as pd
from datetime import datetime, date 
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.deterministic import Fourier,CalendarFourier, DeterministicProcess
import matplotlib.pyplot as plt

##################### Prepare and Process Data #########################
# https://www.kaggle.com/competitions/store-sales-time-series-forecasting/data?select=train.csv
db_file = 'db/store-sales-time-series-forecasting.csv'
train_data  = pd.read_csv(db_file, parse_dates=['date'], header=0) 
train_data['date'] = train_data.date.dt.to_period("D")

train_data = train_data.astype({'store_nbr': 'category', 'family': 'category', 'sales': 'float32'})
train_data.drop("onpromotion", axis=1, inplace=True)
train_data = train_data.set_index(['store_nbr', 'family', 'date']).sort_index()

# Training Range
range_begin = "2016-08-01"
range_end   = "2017-08-01"
# Create Labels data (y output)
y = train_data.unstack(['store_nbr', 'family']).loc[range_begin:range_end] 

##################### Creating Training Data #########################
fourier = CalendarFourier(freq='M', order=30)
dp = DeterministicProcess(
    index=y.index,
    order=1,
    seasonal=True,
    additional_terms=[fourier],
    period=7,
)
X = dp.in_sample()
X['NewYear'] = (X.index.dayofyear == 1)  # X.loc["2016-12-31":"2017-01-02"]
##################### Modeling Fitting  ###########################
model = LinearRegression(fit_intercept=False)
model.fit(X, y)

##################### Modeling Predicting #########################
X_test = dp.out_of_sample(steps=15) # Creating Testing Features
X_test['NewYear'] = (X_test.index.dayofyear == 1)
X_test.index.name = 'date'
y_pred   = pd.DataFrame(model.predict(X_test), index=X_test.index, columns=y.columns)

##################### Visualizing ################################
STORE_NBR = 1  # 1 - 54
FAMILY = 'PRODUCE'  # display(store_sales.index.get_level_values('family').unique())
fig, ax = plt.subplots(figsize=(13,6))
train_data.unstack(['store_nbr', 'family']).loc["2017-05-01":"2017-08-15"].loc(axis=1)['sales', STORE_NBR, FAMILY].plot(ax=ax)
y_pred.loc(axis=1)['sales', STORE_NBR, FAMILY].plot(ax=ax)
ax.set_title(f'{FAMILY} Sales at Store {STORE_NBR}');
ax.set(xlim=[date(2017, 6, 1),date(2017, 8, 15)] );


######## Save the model ###########
import mlflow.sklearn
model_path = "models/sale_forecasting_sklearn_v1"
mlflow.sklearn.save_model(model, model_path)

######### Save the Seasonality (or DeterministicProcess)
from pickle import dump
# save the scaler
dump(dp, open('models/dp_v1.pkl', 'wb'))
dump(y.columns, open('models/y_columns_v1.pkl', 'wb'))

C:\Users\kim_l\anaconda3\envs\machinelearning\lib\site-packages\_distutils_hack\__init__.py:33: UserWarning: Setuptools is replacing distutils.
  warnings.warn("Setuptools is replacing distutils.")

from pickle import load
import flask
import mlflow.sklearn
import pandas as pd

# Configure and load saved model + data
MODEL_PATH = "models/sale_forecasting_sklearn_v1"
DP_PATH = "models/dp_v1.pkl"
Y_COLUMN_PATH = "models/y_columns_v1.pkl"

model = mlflow.sklearn.load_model(MODEL_PATH)
dp = load(open(DP_PATH, "rb"))
y_column = load(open(Y_COLUMN_PATH, "rb"))

"""Start web framework"""
app = flask.Flask(__name__)


@app.route("/", methods=["GET", "POST"])
def predict():
    """Serve the model as a Web Service Endpoint."""
    global model, dp, y_column
    # If model is not cached anymore, we can reload it
    if not model or not dp or len(y_column):
        model = mlflow.sklearn.load_model(MODEL_PATH)
        dp = load(open(DP_PATH, "rb"))
        y_column = load(open(Y_COLUMN_PATH, "rb"))

    data = {"success": False}
    # Get Request
    params = flask.request.get_json(silent=True)
    if params is None:
        params = flask.request.args

    # If range of date is given, do the prediction
    if "range" in params.keys():
        # Creating Testing Features
        X_pred = dp.out_of_sample(steps=int(params["range"]))
        X_pred["NewYear"] = X_pred.index.dayofyear == 1
        X_pred.index.name = "date"
        y = pd.DataFrame(
            model.predict(X_pred), index=X_pred.index, columns=y_column
        )
        data["prediction"] = (
            y.stack(["store_nbr", "family"]).reset_index().to_json()
        )
        data["success"] = True

    return flask.jsonify(data)


if __name__ == "__main__":
    app.run(host="0.0.0.0")

"""import LIBRARIES"""

from pickle import load
"""LOAD SAVED INFO"""

def lambda_handler(event, context):
    global model
    if not model:
        """LOAD SAVED TRAINED MODEL"""  

    data = {"success": False}
    if "body" in event: 
        event = event["body"]

        if event is not None:
            event = json.loads(event)
        else:
            event = {}

    if "range" in event: 
        range = int(event["range"])

        """FEATURE ENGINEERING"""               

        X_pred = XXX
        data["prediction"] = str(model.predict(X_pred))
        data["success"] = True

    return data

import requests

result = requests.post(
    "https://LAMBDA-GATEWAY-ADDRESS.amazonaws.com/default/echo",
    json={"range": "15"},
)
print(result.status_code, result.content)

Self-Host Services and Serverless Functions¶

Introduction¶

1. Self-Host Services / Single Instance¶

1.1 Amazon Elastic Compute (EC2)¶

1.2 Dependencies & Prerequisites¶

1.3 Upload Demo Database¶

1.4 Train Models¶

1.5 Save the Model¶

1.6 Web Service Endpoint¶

2. Serverless Technologies¶

2.1 Intro¶

Benefit¶

Trade-offs¶

2.2 Echo Service on Amazon Lambda¶

2.3 Deploy to Amazon Lambda¶

Zip all current files and upload the zipped file to S3¶

Upload to Lambda¶

API gateway¶

2.4 Heroku¶