Useful patterns
With the sdk we ship useful functions that handle common programmatic functionality for rAPId.
Dataframe Upload
Below is a simple example for uploading a Pandas DataFrame to the API.
import pandas as pd
from rapid import Rapid
from rapid.patterns import dataset
from rapid.items.schema import SchemaMetadata, SensitivityLevel, Owner
from rapid.exceptions import DataFrameUploadValidationException
rapid = Rapid()
raw_data = [{"a": 1, "b": 2, "c": 3}, {"a": 10, "b": 20, "c": 30}]
df = pd.DataFrame(raw_data)
metadata = SchemaMetadata(
layer="default",
domain="mydomain",
dataset="mydataset",
owners=[Owner(name="myname", email="myemail@email.com")],
sensitivity=SensitivityLevel.PUBLIC.value,
)
try:
dataset.upload_and_create_dataset(
rapid=rapid, df=df, metadata=metadata, upgrade_schema_on_fail=False
)
except DataFrameUploadValidationException:
print("Incorrect DataFrame schema")
Update Schema
Now going forward say for instance we now expect that for column c we can expect some values to be floating points, we want to update the schema.
import pandas as pd
from rapid import Rapid
from rapid.patterns import dataset
from rapid.items.schema import SchemaMetadata, SensitivityLevel, Owner, Column
from rapid.exceptions import ColumnNotDifferentException
rapid = Rapid()
raw_data = [{"a": 1, "b": 2, "c": 3}, {"a": 10, "b": 20, "c": 30}]
df = pd.DataFrame(raw_data)
metadata = SchemaMetadata(
layer="default",
domain="mydomain",
dataset="mydataset",
owners=[Owner(name="myname", email="myemail@email.com")],
_sensitivity=SensitivityLevel.PUBLIC.value,
)
try:
dataset.update_schema_to_dataframe(
rapid=rapid,
df=df,
metadata=metadata,
new_columns=[Column(name="c", data_type="Float64")],
)
except ColumnNotDifferentException:
print("Columns not different.")