Source code for chariot.inference_store.models

import ast
from datetime import datetime
from enum import Enum

import numpy as np
from pydantic import BaseModel, ConfigDict, Field


[docs] class PaginationSortDirection(str, Enum): """Defines the sort direction options.""" ASCENDING = "asc" DESCENDING = "desc"
[docs] class PaginationSortField(str, Enum): """Defines the sort field options.""" CREATED_AT = "created_at"
[docs] class PaginationGranularInferenceSortField(str, Enum): """Defines the sort field options for granular inferences.""" CREATED_AT = "created_at" SCORE = "score" LABEL = "label"
[docs] class PaginationEmbeddingSortField(str, Enum): """Defines the sort field options for embedding models.""" CREATED_AT = "created_at" EMBEDDING_DISTANCE = "embedding_distance"
[docs] class TaskType(str, Enum): """The selection of task types supported by the inference store.""" IMAGE_CLASSIFICATION = "Image Classification" OBJECT_DETECTION = "Object Detection" IMAGE_SEGMENTATION = "Image Segmentation" ORIENTED_OBJECT_DETECTION = "Oriented Object Detection" IMAGE_EMBEDDING = "Image Embedding" TEXT_EMBEDDING = "Text Embedding"
[docs] class InferenceAction(str, Enum): """The selection of inference actions relative to task types supported by the inference store. """ PREDICT = "predict" PREDICT_PROBA = "predict_proba" DETECT = "detect" EMBED = "embed"
[docs] class RetentionTaskState(str, Enum): """Defines the retention task state options.""" PENDING = "pending" SCHEDULED = "scheduled" RUNNING = "running" FAILED = "failed" STOPPED = "stopped" COMPLETE = "complete"
[docs] class MetadataFilterOperator(str, Enum): """The selection of metadata filter operators supported by the inference store.""" EQUAL = "=" NOT_EQUAL = "!=" GREATER = ">" LESS = "<" GREATER_OR_EQUAL = ">=" LESS_OR_EQUAL = "<=" IN = "in"
[docs] class MetadataFilterType(str, Enum): """The selection of metadata filter types supported by the inference store.""" STRING = "string" INT = "int" FLOAT = "float" JSON_STRING = "json.string" JSON_INT = "json.int" JSON_FLOAT = "json.float"
[docs] class MetadataStatisticsType(str, Enum): """The selection of metadata statistics types supported by the inference store.""" STRING = "string" INT = "int" FLOAT = "float"
[docs] class MetadataType(str, Enum): """The selection of metadata types supported by the inference store.""" STRING = "string" INT = "int" FLOAT = "float" JSON = "json"
[docs] class DeleteAction(str, Enum): """The selection of delete actions.""" SOFT = "soft" HARD = "hard"
[docs] class EmbeddingDistanceMetric(str, Enum): """The selection of distance metrics supported by the inference store.""" L2 = "l2" NEGATIVE_DOT_PRODUCT = "negative_dot_product" COSINE = "cosine"
[docs] class TimeWindowFilter(BaseModel): """Get inferences within a time window expressed in the following datetime format: YYYY-MM-DDTHH:MM:SSZ. To format an existing datetime object: dt.strftime("%Y-%m-%dT%H:%M:%S.%fZ") :param start: The left time bound. Defaults to 1970-01-01T00:00:00Z if not supplied. :type start: Optional[str] :param end: The right time bound: Defaults to now if not supplied. :type end: Optional[str] """ start: str | None = None end: str | None = None
[docs] class MetadataFilter(BaseModel): """Get inferences matching a given metadata inequality/constraint. :param key: The name of the metadata key. Specify json keys using dot notation. For example, in order to filter against the nested key "y" in {"x": {"y": 10, "z": 5}} -> x.y :type key: str :param operator: The equality/inequality operator in which to compare a value against (=, !=, >, <, >=, <=, in). :type operator: models.MetadataFilterOperator :param type: The metadata value type (string, int, float, json.string, json.int, json.float). :type type: models.MetadataFilterType :param value: The value the operator compares against. If using the 'in' operator, pass a list of unquoted values using brackets and comma separation, [v1, v2, ..., vn] :type value: str """ key: str operator: MetadataFilterOperator type: MetadataFilterType value: str
[docs] @staticmethod def form_values_for_in_operator( values_type: MetadataFilterType, values: list[str] | list[int] | list[float] ) -> str | None: if values_type == MetadataFilterType.STRING: return str(values).replace("'", "") elif values_type == MetadataFilterType.INT or values_type == MetadataFilterType.FLOAT: return str(values) return None
model_config = ConfigDict(use_enum_values=True)
[docs] class EmbeddingFilter(BaseModel): """Get embeddings using a query embedding and distance metric. :param query_embedding: The query embedding to search against. :type query_embedding: List[float] :param distance_metric: The distance metric operator (defaults to cosine) :type end: str """ query_embedding: list[float] distance_metric: str | None = EmbeddingDistanceMetric.COSINE
[docs] @staticmethod def form_query_embedding(embedding: list | np.ndarray) -> str: if isinstance(embedding, list): return str(embedding) if isinstance(embedding, np.ndarray): return str(embedding.tolist())
model_config = ConfigDict(use_enum_values=True)
[docs] class GeoPoint(BaseModel): """Defines a point on the globe. :param latitude: A latitude value in decimal format between -90 and 90. :type latitude: float :param longitude: A longitude value in decimal format between -180 and 180. :type longitude: float """ latitude: float longitude: float
[docs] class GeoCircle(BaseModel): """Defines a circular search area using a centerpoint and radius. :param center: A latitude value in decimal format between -90 and 90. :type center: GeoPoint :param radius: The radius in meters to expand from the center :type radius: float """ center: GeoPoint radius: float
[docs] class GeoRectangle(BaseModel): """Defines a rectangular search area using two points on the globe. :param p1: The first point on the globe. :type p1: GeoPoint :param p2: The second point on the globe. :type p2: GeoPoint """ p1: GeoPoint p2: GeoPoint
[docs] class GeolocationFilter(BaseModel): """Defines a rectangular search area using two points on the globe. :param gps_coordinates_circle: The circular area on the globe to search for inferences. :type gps_coordinates_circle: models.GeoCircle :param rectangular_search_area: The rectangular area on the globe to search for inferences. :type rectangular_search_area: models.GeoRectangle """ gps_coordinates_circle: GeoCircle | None = None gps_coordinates_rectangle: GeoRectangle | None = None
[docs] class DeconstructedInferenceFilter(BaseModel): """Helper object for filtering inferences by labels and scores. :param labels: The list of labels to filter on. :type labels: Optional[List[str]] :param minimum_score: The minimum confidence score. :type minimum_score: Optional[float] :param maximum_score: The maximum confidence score. :type maximum_score: Optional[float] """ labels: list[str] | None = None minimum_score: float | None = None maximum_score: float | None = None
[docs] class BaseInferenceFilter(BaseModel): """Helper object for filtering inferences. :param inference_action_filter: Get inferences with a given inference action. :type inference_action_filter: str :param data_hash_filter: Get inferences with a given data hash. :type data_hash_filter: str :param data_source_filter: Get inferences with a given data source. :type data_source_filter: str :param time_window_filter: Get inferences within a time window. :type time_window_filter: models.TimeWindowFilter :param metadata_filter: Get inferences matching the intersection of one or more metadata filters. :type metadata_filter: List[models.MetadataFilter] :param location_filter: Get inferences in a circular or rectangular area on the globe. :type location_filter: models.GeolocationFilter :param deconstructed_inference_filter: Get inferences with certain labels or confidence scores. :type deconstructed_inference_filter: models.DeconstructedInferenceFilter """ inference_action_filter: str | None = None data_hash_filter: str | None = None data_source_filter: str | None = None time_window_filter: TimeWindowFilter | None = None metadata_filter: list[MetadataFilter] | None = None location_filter: GeolocationFilter | None = None deconstructed_inference_filter: DeconstructedInferenceFilter | None = None
[docs] class NewRegisterModelRequest(BaseModel): """Helper object to register a model for inference storage. :param model_id: The model id. :type model_id: str :param project_id: The project id. :type project_id: str :param task_type: The model task type. :type task_type: TaskType :param embedding_size: The dimension of the embeddings produced. :type embedding_size: int """ model_id: str project_id: str task_type: TaskType embedding_size: int = 0 model_config = ConfigDict(protected_namespaces=(), use_enum_values=True)
[docs] class NewStandardMetadataRequest(BaseModel): """Helper object to attach standard metadata to an inference. Currently supported task type, inference action pairs: Image Classification: [predict, predict_proba, embed] Object Detection: [detect] Image Segmentation: [predict, predict_proba] Image Embedding = [embed] :param data_hash: The SHA256 hexdigest of the data inferred upon. Use the helper function get_data_hash if necessary. :type data_hash: str :param data_size: The size of the input data. :type data_size: int :param task_type: The task type of the model. :type task_type: models.TaskType :param inference_action: The inference action passed to the inference server. :type inference_action: models.InferenceAction :param data_source: An optional field to express the origin of the inference data. :type data_source: Optional[str] :param latitude: An optional field to store the latitude the inference data was captured at. :type latitude: Optional[float] :param longitude: An optional field to store the longitude the inference data was captured at. :type longitude: Optional[float] """ data_hash: str data_size: int task_type: TaskType inference_action: InferenceAction data_source: str | None = None latitude: float | None = None longitude: float | None = None model_config = ConfigDict(use_enum_values=True)
[docs] class NewExtendedMetadataRequest(BaseModel): """Helper object to attach extended/custom metadata to an inference. :param key: The name of the metadata key. :type key: str :param type: The metadata value type (string, int, float, json). :type type: models.MetadataType :param value: The metadata value. :type value: str """ key: str type: MetadataType value: str model_config = ConfigDict(use_enum_values=True)
[docs] class NewMetadataCollectionRequest(BaseModel): """Helper object to collect standard and extended metadata to be associated to an inference. Call _encode_ for the ability to upload the corresponding json data to blob storage. :param standard_metadata: The standard metadata. :type standard_metadata: models.NewStandardMetadataRequest :param extended_metadata: The extended metadata. Defaults to an empty list. :type extended_metadata: List[models.NewExtendedMetadataRequest] """ standard_metadata: NewStandardMetadataRequest extended_metadata: list[NewExtendedMetadataRequest] = []
[docs] class NewInferenceAndMetadataCollection(BaseModel): """Helper object to collect inference, standard and extended metadata. The inference-store stores the inference in whatever format the model/inference server natively returns. In order to facilitate label search, filter by confidence score, and standardize region/geometry related elements, the deconstructed_inference field must be specified. If not specified, the inference will still be stored, but the inference will not be retrievable by the before mentioned filters. For example, an object detection inference might be returned in the following format: .. code-block:: python { "detection_boxes": [[10.0, 10.0, 20.0, 20.0], [30.0, 30.0, 40.0, 40.0]], "detection_classes": ["car", "truck"], "detection_scores": [0.9, 0.95] } Another, equally well trained model might return a very similar result but in the following format: .. code-block:: python [ {"bounding_box": [10.0, 10.0, 20.0, 20.0], "score": 0.9, "label": "car"} {"bounding_box": [30.0, 30.0, 40.0, 40.0], "score": 0.95, "label": "truck"} ] Thus, a standard task-type conditional structure is needed so models with different output formats can speak the same language. Call the encode method for the ability to upload the corresponding json data to blob storage. :param image_classification: The collection of image classification inferences :type image_classification: List[dict] :param object_detection: The collection of object detection inferences :type object_detection: List[dict] :param image_segmentation: The collection of image segmentation inferences :type image_segmentation: List[dict] :param oriented_object_detection: The collection of oriented object detection inferences :type oriented_object_detection: List[dict] :param embedding: A embedding :type embedding: List[float] :param metadata: The collection of metadata associated to the inference. :type metadata: models.NewMetadataCollectionRequest """ image_classification: list[dict] = None object_detection: list[dict] = None image_segmentation: list[dict] = None oriented_object_detection: list[dict] = None embedding: list[float] = None metadata: NewMetadataCollectionRequest
[docs] def encode(self): return self.json()
[docs] class NewInferenceStorageRequest(BaseModel): """Helper object to store an inference. :param model_id: The id of the model. :type model_id: str :param inference_id: The id of the inference. :type inference_id: str :param data: The collection of inference and metadata. :type data: models.NewInferenceAndMetadataCollection :param data_storage_key: An optional data storage key returned from the upload.upload_data function. :type data_storage_key: str :param is_protected: Whether the inference and its associated data should be protected from deletion by retention policy :type is_protected: bool """ model_id: str inference_id: str data: NewInferenceAndMetadataCollection data_storage_key: str | None = None is_protected: bool = False model_config = ConfigDict(protected_namespaces=())
[docs] class NewGetInferencesRequest(BaseModel): """Get inferences matching a series of filters. :param filters: A collection of filters. :type filters: models.BaseInferenceFilter :param embedding_filters: An extended set of filters for embedding models. :type embedding_filters: models.EmbeddingFilter :param pagination: Get inferences matching pagination constraints. :type pagination: models.Pagination :param presign: Whether to presign data_storage_key(s). :type presign: bool """ filters: BaseInferenceFilter = BaseInferenceFilter() embedding_filters: EmbeddingFilter | None = None pagination: Pagination | None = None presign: bool = False
[docs] class NewGetMetadataStatisticsRequest(BaseModel): """Helper object to get metadata statistics :param filters: A collection of filters. :type filters: models.BaseInferenceFilter :param key: Filter by metadata key. :type key: str :param type: Filter by metadata type. :type type: models.MetadataStatisticsType :param distribution_bin_count: Number of bins in the distribution. Defaults to producing 10 bins if not specified. :type distribution_bin_count: int :param distribution_bin_width: Width of a bin within the distribution. Both count and width cannot be specified jointly. :type distribution_bin_width: float :param distribution_minimum_value: The minimum value in which binning begins. Defaults to the minimum value within the time window specified. :type distribution_minimum_value: float :param distribution_maximum_value: The maximum value in which binning ends. Defaults to the maximum value within the time window specified. :type distribution_maximum_value: float """ filters: BaseInferenceFilter | None = None key: str type: MetadataStatisticsType distribution_bin_count: int | None = None distribution_bin_width: float | None = None distribution_maximum_value: float | None = None distribution_minimum_value: float | None = None model_config = ConfigDict(use_enum_values=True)
[docs] class NewRetentionTaskRequest(BaseModel): """Helper object to create a retention task. :param dry_run: If true, returns the number of inferences that would be deleted if the retention policy was fully executed. :type dry_run: bool :param retention_policy_id: The id of the retention policy to run. :type retention_policy_id: str """ dry_run: bool retention_policy_id: str
[docs] class NewGetRetentionTasksRequest(BaseModel): """Helper object to filter retention tasks. :param retention_policy_id: Filter by retention policy id. :type retention_policy_id: str :param state_filter: Filter by retention task state. :type state_filter: models.RetentionTaskState :param time_window_filter: Get retention tasks within a time window. :type time_window_filter: models.TimeWindowFilter :param pagination: Get retention tasks matching pagination constraints. :type pagination: models.Pagination """ retention_policy_id: str | None = None state_filter: RetentionTaskState | None = None time_window_filter: TimeWindowFilter | None = None pagination: Pagination | None = None model_config = ConfigDict(use_enum_values=True)
[docs] class NewRetentionPolicyRequest(BaseModel): """Helper object to create a retention policy. A value of -1 for maximum_record_age indicates that inferences should never be deleted. The maximum_blob_age must be greater than or equal to 0. The maximum_blob_age must also be equal to or less than the maximum_record_age. :param automated_interval: Interval to automatically run retention policy in hours. Set to 0 if manual executions are desired. :type automated_interval: int :param maximum_record_age: The maximum age (in hours) since now in which an inference and its associated data is safe from deletion. :type maximum_record_age: int :param maximum_blob_age: The maximum age (in hours) since now in which a blob is safe from deletion. :type maximum_blob_age: int :param delete_action: Whether to 'soft' or 'hard' delete the database records :type delete_action: models.DeleteAction """ automated_interval: int maximum_record_age: int maximum_blob_age: int delete_action: DeleteAction = DeleteAction.SOFT model_config = ConfigDict(use_enum_values=True)
[docs] class NewExportTaskRequest(BaseModel): """Helper object to create a retention task. :param filters: A collection of filters. :type filters: models.BaseInferenceFilter :param include_inference_ids: The list of additional inference ids to include. :type include_inference_ids: list[str] :param exclude_inference_ids: The list of additional inference ids to exclude. :type exclude_inference_ids: list[str] :param include_inferences_as_annotations: Whether to include the inferences as annotations (only eligible for version=v2 inferences)". :type include_inferences_as_annotations: bool :param include_custom_metadata: Whether to include custom metadata attached to the inference. :type include_custom_metadata: bool """ filters: BaseInferenceFilter = BaseInferenceFilter() include_inference_ids: list[str] = Field(default_factory=list) exclude_inference_ids: list[str] = Field(default_factory=list) include_inferences_as_annotations: bool = False include_custom_metadata: bool = False
[docs] class Model(BaseModel): """Defines an inference store model resource. :param model_id: The model id. :type model_id: str :param created_at: A timestamp of when the model was created :type created_at: datetime :param updated_at: A timestamp of when the model was updated :type updated_at: datetime :param embedding_size: The dimension of the embeddings produced. :type embedding_size: int """ model_id: str created_at: datetime updated_at: datetime embedding_size: int model_config = ConfigDict(protected_namespaces=())
[docs] class Metadata(BaseModel): """Defines an inference store metadata resource. :param key: The name of the metadata key. :type key: str :param type: The metadata value type (string, int, float, json). :type type: str :param value: The metadata value. :type value: str """ key: str type: str value: str
[docs] class MetadataStatistics(BaseModel): """Defines an inference store metadata statistics resource. :param count: The number of observations given the request filters. :type count: int :param distribution: The distribution of values. :type distribution: Dict[str, int] :param min: The minimum value in the set of metadata values :type min: int :param max: The maximum value in the set of metadata values :type max: int """ count: int | None = None distribution: dict[str, int] | None = None min: float | None = None max: float | None = None
[docs] class MetadataKeyTypeCounts(BaseModel): """Defines an inference store metadata key type count resource. :param counts: The count of each metadata key, type pair. :type counts: Dict[str, Dict[str, int]] """ counts: dict[str, dict[str, int]]
[docs] class Inference(BaseModel): """Defines an inference store inference resource. :param model_id: The model id. :type model_id: str :param inference_id: The inference id. :type inference_id: str :param created_at: A timestamp of when the inference was created :type created_at: datetime :param updated_at: A timestamp of when the inference was updated :type updated_at: datetime :param inference_action: The inference action. :type inference_action: str :param data: The inference data. Returned as an arbitrary string when the associated model is not registered as an embedding model. :type data: Optional[str] :param embedding_distance: The embedding distance from a query embedding. :type embedding_distance: Optional[float] :param metadata: The collection of metadata associated to the inference. :type metadata: Optional[List[Metadata]] :param data_hash: The hash of the inference input data. :type data_hash: str :param data_source: The data source of the inference. :type data_source: Optional[str] :param data_coordinates: A set of geospatial coordinates defining where the inference occurred. :type data_coordinates: Optional[GeoPoint] :param data_storage_key: The internal data storage key for the inference input data. :type data_storage_key: Optional[str] :param presigned_url: A presigned url to download the inference input data. :type presigned_url: Optional[str] :param is_protected: Whether the inference and its associated data should be protected from deletion by retention policy :type is_protected: bool :param version: The inference version. :type version: str """ model_id: str inference_id: str created_at: datetime updated_at: datetime inference_action: str data: str | None = None embedding_distance: float | None = None metadata: list[Metadata] | None = None data_hash: str data_source: str | None = None data_coordinates: GeoPoint | None = None data_storage_key: str | None = None presigned_url: str | None = None is_protected: bool = False version: str model_config = ConfigDict(protected_namespaces=()) @property def structured_embedding(self) -> list[int | float] | None: if self.data is None: return [] return ast.literal_eval(self.data)
[docs] class RetentionTask(BaseModel): """Defines an inference store retentiont task resource. :param id: The retention task id. :type id: str :param retention_policy_id: The retention policy id. :type retention_policy_id: str :param deleted_record_count: The count of all records currently deleted. :type deleted_record_count: int :param deleted_blob_count: The count of all blobs currently deleted. :type deleted_blob_count: int :param total_record_count: The expected total count of all records to be deleted. :type total_record_count: int :param total_blob_count: The expected total count of all blobs to be deleted. :type total_blob_count: int :param state: The state of the retention task. :type state: str :param maximum_record_age_limit: The timestamp in which records are safe from deletion. :type maximum_record_age_limit: datetime :param maximum_blob_age_limit: The timestamp in which blobs are safe from deletion. :type maximum_blob_age_limit: datetime """ id: str retention_policy_id: str deleted_record_count: int deleted_blob_count: int total_record_count: int total_blob_count: int state: str maximum_record_age_limit: datetime maximum_blob_age_limit: datetime
[docs] class RetentionPolicy(BaseModel): """Defines an inference store retention policy resource. :param id: The retention policy id. :type id: str :param model_id: The model id. :type model_id: str :param delete_action: Whether records are soft deleted or hard deleted. :type delete_action: bool :param maximum_record_age: The maximum age (in hours) in which an inference and its associated data is safe from deletion. :type maximum_record_age: int :param maximum_blob_age: The maximum age (in hours) in which a blob is safe from deletion. :type maximum_blob_age: int :param automated_interval: The interval (in hours) in which this retention policy will be automatically run. :type automated_interval: int :param last_scheduled_at: The last time a retention task for this retention policy was run. :type last_scheduled_at: datetime """ id: str model_id: str delete_action: str maximum_record_age: int maximum_blob_age: int automated_interval: int | None = None last_scheduled_at: datetime | None = None model_config = ConfigDict(protected_namespaces=())
[docs] class ExportTask(BaseModel): """Defines an inference store retentiont task resource. :param id: The export task id. :type id: str :param progress_count: The current number of inferences added to the archive file. :type progress_count: int :param expected_count: The expected number of inferences to be added to archive file. :type expected_count: int :param state: The export task state. :type state: str :param presigned_url: A presigned url to the archive if the export task is complete. :type presigned_url: Optional[str] """ id: str progress_count: int expected_count: int state: str presigned_url: str | None = None
[docs] class DataUpload(BaseModel): """Defines an inference store data upload resource. :param data_presigned_url: A presigned url to upload the inference input data to. :type data_presigned_url: str :param data_storage_key: The internal storage key to the inference input data. :type data_storage_key: str """ data_presigned_url: str data_storage_key: str
[docs] class MetadataUpload(BaseModel): """Defines an inference store metadata upload resource. :param metadata_presigned_url: A presigned url to upload the inference and metadata to. :type metadata_presigned_url: str :param metadata_storage_key: The internal storage key to the inference and metadata. :type metadata_storage_key: str """ metadata_presigned_url: str metadata_storage_key: str