Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a specific error for missing columns during materialization #1619

Merged
merged 5 commits into from
Jun 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions sdk/python/feast/errors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Set

from colorama import Fore, Style


Expand Down Expand Up @@ -75,3 +77,13 @@ def __init__(self, expected, missing):
f"The entity dataframe you have provided must contain columns {expected}, "
f"but {missing} were missing."
)


class FeastJoinKeysDuringMaterialization(Exception):
def __init__(
self, source: str, join_key_columns: Set[str], source_columns: Set[str]
):
super().__init__(
f"The DataFrame from {source} being materialized must have at least {join_key_columns} columns present, "
f"but these were missing: {join_key_columns - source_columns} "
)
6 changes: 3 additions & 3 deletions sdk/python/feast/infra/offline_stores/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytz

from feast.data_source import DataSource, FileSource
from feast.errors import FeastJoinKeysDuringMaterialization
from feast.feature_view import FeatureView
from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob
from feast.infra.provider import (
Expand Down Expand Up @@ -218,9 +219,8 @@ def pull_latest_from_table_or_query(

source_columns = set(source_df.columns)
if not set(join_key_columns).issubset(source_columns):
raise ValueError(
f"The DataFrame must have at least {set(join_key_columns)} columns present, "
f"but these were missing: {set(join_key_columns)- source_columns} "
raise FeastJoinKeysDuringMaterialization(
data_source.path, set(join_key_columns), source_columns
)

ts_columns = (
Expand Down
34 changes: 34 additions & 0 deletions sdk/python/tests/example_feature_repo_with_entity_join_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from google.protobuf.duration_pb2 import Duration

from feast import Entity, Feature, FeatureView, ValueType
from feast.data_source import FileSource

driver_hourly_stats = FileSource(
path="%PARQUET_PATH%", # placeholder to be replaced by the test
event_timestamp_column="datetime",
created_timestamp_column="created",
)


# The join key here is deliberately different from the parquet file to test the failure path.
driver = Entity(
name="driver_id",
value_type=ValueType.INT64,
description="driver id",
join_key="driver",
)


driver_hourly_stats_view = FeatureView(
name="driver_hourly_stats",
entities=["driver_id"],
ttl=Duration(seconds=86400 * 1),
features=[
Feature(name="conv_rate", dtype=ValueType.FLOAT),
Feature(name="acc_rate", dtype=ValueType.FLOAT),
Feature(name="avg_daily_trips", dtype=ValueType.INT64),
],
online=True,
input=driver_hourly_stats,
tags={},
)
23 changes: 23 additions & 0 deletions sdk/python/tests/test_e2e_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,26 @@ def test_e2e_local() -> None:
assert r.returncode == 0

_assert_online_features(store, driver_df, end_date)

# Test a failure case when the parquet file doesn't include a join key
with runner.local_repo(
get_example_repo("example_feature_repo_with_entity_join_key.py").replace(
"%PARQUET_PATH%", driver_stats_path
),
"file",
) as store:

assert store.repo_path is not None

# feast materialize
returncode, output = runner.run_with_output(
[
"materialize",
start_date.isoformat(),
(end_date - timedelta(days=7)).isoformat(),
],
cwd=Path(store.repo_path),
)

assert returncode != 0
assert "feast.errors.FeastJoinKeysDuringMaterialization" in str(output)
Empty file.