Source code for mllaunchpad.model_interface

# Stdlib imports
import abc
import logging


logger = logging.getLogger(__name__)


[docs]class ModelMakerInterface(abc.ABC): """Abstract model factory interface for Data-Scientist-created models. Please inherit from this class and put your training code into the method "create_trained_model". This method will be called by the framework when your model needs to be (re)trained. Why not simply use static methods? We want to make it possible for create_trained_model to pass extra info test_trained_model without extending the latter with optional keyword arguments that might be confusing for the 90% of cases where they are not needed. So we rely on the smarts of the person inheriting from this class to find a solution/shortcuts if they want to do more difficult things e.g. want to do the train/test split themselves. """
[docs] @abc.abstractmethod def create_trained_model( self, model_conf, data_sources, data_sinks, old_model=None ): """Implement this method, including data prep/feature creation. No need to test your model here. Put testing code in test_trained_model, which will be called automatically after training. (Feel free to put common code for preparing data into another function, class, library, ...) Params: model_conf: the model configuration dict from the config file data_sources: dict containing the data sources (this includes your train/validation/test data), as configured in the config file data_sinks: dict containing the data sinks, as configured in the config file. Usually unused when training. old_model: contains an old model, if it exists, which can be used for incremental training. default: None Return: The trained model/data/anything which you want to use in the predict() function. (usually simply a fitted model object, but can be anything, like a dict of several models, a model with some extra info, etc.) (Whatever you return here gets automatically stuffed into your ModelInterface-inherited object and is accessible there using predict's model parameter (or the self.contents attribute.)) """
[docs] @abc.abstractmethod def test_trained_model(self, model_conf, data_sources, data_sinks, model): """Implement this method, including data prep/feature creation. This method will be called to re-test a model, e.g. to check whether it has to be re-trained. (Feel free to put common code for preparing data into another function, class, library, ...) Params: model_conf: the model configuration dict from the config file data_sources: dict containing the data sources (this includes your train/validation/test data), as configured in the config file data_sinks: dict containing the data sinks, as configured in the config file. Usually unused when testing. model: your model object (whatever you returned in create_trained_model) Return: Return a dict of metrics (like 'accuracy', 'f1', 'confusion_matrix', etc.) """
[docs]class ModelInterface(abc.ABC): """Abstract model interface for Data-Scientist-created models. Please inherit from this class when creating your model to make it usable for ModelApi. You don't need to create this object yourself when training. It is created automatically and the model/info returned from create_trained_model is made accessible to you through the self.contents attribute. """ def __init__(self, contents=None): """If you overwrite __init__, please call super().__init__(...) at the beginning. Otherwise, you need to assign self.contents to the contents parameter manually in __init__. Params: contents: any object that is needed for prediction (usually a trained classifier or predictor). It is passed to the predict() method as the "model" parameter for convenience. """ self.contents = contents self.have_columns_been_ordered = False
[docs] @abc.abstractmethod def predict(self, model_conf, data_sources, data_sinks, model, args_dict): """Implement this method, including data prep/feature creation based on argsDict. argsDict can also contain an id which the model can use to fetch data from any data_sources. (Feel free to put common code for preparing data into another function, class, library, ...) Params: model_conf: the model configuration dict from the config file data_sources: dict containing the data sources data_sinks: dict containing the data sinks, as configured in the config file. model: your model object (whatever you returned in create_trained_model) argsDict: parameters the API was called with, dict of strings (any type conversion needs to be done by you) Return: Prediction result as a dictionary/list structure which will be automatically turned into JSON. """
def __del__(self): """Clean up any resources (temporary files, sockets, etc.). If you overwrite this method, please call super().__del__() at the beginning. """