Getting Started
Quick Start
Get from zero to a working feature query in under five minutes.
Prerequisites
- Thyme SDK installed (
pip install thyme-sdk) - Connected to your hosted Thyme instance via
thyme login(see Installation)
1. Define your features
Create features.py:
from datetime import datetime
from thyme.dataset import dataset, field
from thyme.pipeline import pipeline, inputs, Avg, Count
from thyme.featureset import featureset, feature, extractor
from thyme.featureset import extractor_inputs, extractor_outputs
from thyme.connectors import IcebergSource, source
# Raw event stream
@source(
IcebergSource(catalog="prod", database="events", table="transactions"),
cursor="ts",
every="1m",
max_lateness="5m",
cdc="append",
)
@dataset(index=True)
class Transaction:
user_id: str = field(key=True)
amount: float
ts: datetime = field(timestamp=True)
# Aggregated stats (computed by the engine)
@dataset(index=True)
class UserStats:
user_id: str = field(key=True)
ts: datetime = field(timestamp=True)
avg_amount_7d: float
txn_count_30d: int
@pipeline(version=1)
@inputs(Transaction)
def compute(cls, t: Transaction):
return (
t.groupby("user_id")
.aggregate(
avg_amount_7d=Avg(of="amount", window="7d"),
txn_count_30d=Count(of="user_id", window="30d"),
)
)
# Feature set exposed to models
@featureset
class UserFeatures:
uid: str = feature(id=1)
avg_spend_7d: float = feature(id=2)
txn_count_30d: int = feature(id=3)
@extractor
@extractor_inputs("uid")
@extractor_outputs("avg_spend_7d", "txn_count_30d")
def from_stats(cls, ts, inputs):
uid = inputs["uid"]
row = UserStats.lookup(ts, user_id=uid)
return row["avg_amount_7d"], row["txn_count_30d"]2. Commit to your Thyme instance
thyme commit features.py
# Committed 2 dataset(s), 1 pipeline(s), 1 featureset(s), 1 source(s)3. Query features
Use the CLI (no Python needed):
thyme query features:UserFeatures -e user_42┏━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ uid ┃ avg_spend_7d ┃ txn_count_30d ┃
┡━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ user_42 │ 47.32 │ 18 │
└──────────┴───────────────┴───────────────┘
Query run: 7b3e4c...
Results: $THYME_FRONTEND_URL/query-runs/7b3e4c...Or via HTTP if you prefer:
curl -H "Authorization: Bearer $THYME_API_KEY" \
"$THYME_BASE_URL/features?featureset=UserFeatures&entity_id=user_42"{
"entity_type": "UserFeatures",
"entity_id": "user_42",
"features": {
"avg_spend_7d": 47.32,
"txn_count_30d": 18
},
"mode": "featureset"
}When THYME_FRONTEND_URL is set in your environment, the CLI prints a clickable Results: link to the Query Run page in the UI where you can see latency, hit rate, and replay the query.
Next steps
See First Feature for a detailed walkthrough of each concept, Query Runs for the audit trail, or jump to Concepts to understand the building blocks.