Collie MLOps Framework Documentation
Welcome to Collie
Collie is a lightweight MLOps framework that provides a modular, event-driven architecture for building machine learning pipelines with deep MLflow integration.
Key Features:
Modular Components: Transform, Train, Tune, Evaluate, and Push
Event-Driven: Flexible workflow orchestration
MLflow Integration: First-class MLflow support for tracking and model management
Lightweight: Simple setup, no complex dependencies
Extensible: Easy to add custom components
Quick Start
Installation
pip install collie-mlops
Basic Usage
from collie.core import (
Transformer,
Trainer,
Orchestrator,
TransformerPayload,
TrainerPayload
)
from collie import Event
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
# Define your Transformer
class IrisTransformer(Transformer):
def handle(self, event: Event) -> Event:
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.DataFrame(data.target, columns=["target"])
train_data = pd.concat([X, y], axis=1)
return Event(
payload=TransformerPayload(
train_data=train_data,
validation_data=None,
test_data=None,
# Optional: pass custom data
extra_data={
"feature_names": data.feature_names.tolist(),
"n_classes": len(data.target_names)
}
)
)
# Define your Trainer
class IrisTrainer(Trainer):
def handle(self, event: Event) -> Event:
train_data = event.payload.train_data
X = train_data.drop("target", axis=1)
y = train_data["target"]
# Access extra data if available
feature_names = event.payload.extra_data.get("feature_names", [])
model = RandomForestClassifier()
model.fit(X, y)
accuracy = model.score(X, y)
# Log metrics to MLflow
self.mlflow.log_metric("accuracy", accuracy)
if feature_names:
self.mlflow.log_param("n_features", len(feature_names))
return Event(
payload=TrainerPayload(model=model)
)
# Run the pipeline
orchestrator = Orchestrator(
components=[IrisTransformer(), IrisTrainer()],
tracking_uri="http://localhost:5000",
experiment_name="iris_experiment",
registered_model_name="iris_model"
)
orchestrator.run()