Install the dependencies in your local environment.
pip install -r requirements.txt
train_model.py
import sys
import joblib
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
if __name__ == "__main__":
no_components = int(sys.argv[1])
print(f"Number of components is set to {no_components}")
# Load the MovieLens 100k dataset. Only five
# star ratings are treated as positive.
data = fetch_movielens(min_rating=5.0)
# Instantiate and train the model
model = LightFM(no_components=no_components, loss='warp')
model.fit(data['train'], epochs=30, num_threads=2)
# Save the model
joblib.dump(model, "model.joblib")
src/func_main.py
import joblib
import numpy as np
from lightfm import LightFM
# Load model once
model: LightFM = joblib.load("/model/files/model.joblib")
# Get all item ids
item_ids = np.arange(0, 1682)
def get_top_rank_item(user_id):
# Calculate scores per item id
y = model.predict(user_ids=[user_id], item_ids=item_ids)
# Pick top 3
top_3 = y.argsort()[:-4:-1]
# Return {'top_1': ..., 'top_2': ..., 'top_3': ...}
return dict([(f"top_{i + 1}", item_id) for i, item_id in enumerate(top_3)])
We'll simulate production data flow by repeatedly asking our model for recommendations.
import numpy as np
from hydrosdk import Cluster, Application
from tqdm.auto import tqdm
cluster = Cluster("http://localhost", grpc_address="localhost:9090")
app = Application.find(cluster, "movie-ab-app")
predictor = app.predictor()
user_ids = np.arange(0, 943)
for uid in tqdm(np.random.choice(user_ids, 2000, replace=True)):
result = predictor.predict({"user_id": uid})
To create an A/B deployment we need to create an with a single execution stage consisting of two model variants. These model variants are our and correspondingly.