Source code for chariot.datasets.files

from chariot import _apis
from chariot.datasets import _utils, models
from chariot_api._openapi.datasets_v3 import models as openapi_models

__all__ = [
    "get_file",
    "get_dataset_files",
    "get_snapshot_files",
    "create_dataset_file",
    "create_snapshot_file",
    "create_dataset_file_and_wait",
    "create_snapshot_file_and_wait",
    "wait_for_file",
]

DEFAULT_FILE_WAIT_TIMEOUT = 120
DEFAULT_FILE_WAIT_INTERVAL = 0.5


[docs] def get_file(id: str) -> models.File: response = _apis.datasets_v3.files_api.get_file(file_id=id) if not response.data: raise RuntimeError("Received malformed response (missing `data`) from get_file") return _utils.convert_to_dataclass(response.data.model_dump(), models.File)
[docs] def get_dataset_files(dataset_id: str) -> list[models.File]: """Get files for a dataset :param dataset_id: Dataset ID to retrieve files for. :type dataset_id: str :return: File details for the dataset ID :rtype: List[models.File] """ response = _apis.datasets_v3.files_api.get_dataset_files(dataset_id=dataset_id) if not response.data: return [] return [_utils.convert_to_dataclass(f.model_dump(), models.File) for f in response.data]
[docs] def get_snapshot_files(snapshot_id: str) -> list[models.File]: """Get files for a snapshot :param snapshot_id: Snapshot ID to retrieve files for. :type snapshot_id: str :return: File details for the snapshot ID :rtype: List[models.File] """ response = _apis.datasets_v3.files_api.get_snapshot_files(snapshot_id=snapshot_id) if not response.data: return [] return [_utils.convert_to_dataclass(f.model_dump(), models.File) for f in response.data]
[docs] def create_dataset_file( *, dataset_id: str, file_format: models.FileFormat | None = None, file_type: models.FileType, manifest_type: models.ManifestType | None = None, split: models.SplitName | None = None, ) -> models.File: """Create or retrieve an archive file or manifest file for a dataset, return the file object with location if available. The Function only starts the file creation process if the file does not already exist. Note: Creating archive files for datasets are not currently supported and will result in an error. :param dataset_id: Id of dataset to create file for :type dataset_id: str :param file_format: File format :type file_format: Optional[models.FileFormat] :param file_type: File type :type file_type: models.FileType :param manifest_type: Manifest type :type manifest_type: Optional[models.ManifestType] :param split: Split :type split: Optional[models.SplitName] :return: File detail for the newly created or existent file :rtype: models.File """ request = openapi_models.InputCreateFileRequest( file_format=_utils.enum_value(file_format), file_type=_utils.enum_value(file_type), manifest_type=_utils.enum_value(manifest_type), split=_utils.enum_value(split), ) response = _apis.datasets_v3.files_api.create_dataset_file(dataset_id=dataset_id, body=request) if not response.data: raise RuntimeError("Received malformed response (missing `data`) from create_dataset_file") return _utils.convert_to_dataclass(response.data.model_dump(), models.File)
[docs] def create_snapshot_file( *, snapshot_id: str, file_format: models.FileFormat | None = None, file_type: models.FileType, manifest_type: models.ManifestType | None = None, split: models.SplitName | None = None, ) -> models.File: """Create or retrieve an archive file or manifest file for a snapshot, return the file object with location if available. The Function only starts the file creation process if the file does not already exist. :param snapshot_id: Id of snapshot to create file for :type snpshot_id: str :param file_format: File format :type file_format: Optional[models.FileFormat] :param file_type: File type :type file_type: models.FileType :param manifest_type: Manifest type :type manifest_type: Optional[models.ManifestType] :param split: Split :type split: Optional[models.SplitName] :return: File detail for the newly created or existent file :rtype: models.File """ request = openapi_models.InputCreateFileRequest( file_format=_utils.enum_value(file_format), file_type=_utils.enum_value(file_type), manifest_type=_utils.enum_value(manifest_type), split=_utils.enum_value(split), ) response = _apis.datasets_v3.files_api.create_snapshot_file( snapshot_id=snapshot_id, body=request ) if not response.data: raise RuntimeError("Received malformed response (missing `data`) from create_snapshot_file") return _utils.convert_to_dataclass(response.data.model_dump(), models.File)
[docs] def wait_for_file( id: str, *, timeout: float = DEFAULT_FILE_WAIT_TIMEOUT, wait_interval: float = DEFAULT_FILE_WAIT_INTERVAL, ) -> models.File: """Polls the given file until it has finished processing. :param id: Id of the file to wait for :type id: str :param timeout: Number of seconds to wait for file to complete (default 120) :type timeout: float :param wait_interval: Number of seconds between successive calls to check the file for completion (default 0.5) :type wait_interval: float :return: The file details :rtype: models.File :raises TimeoutError: If the timeout has been reached """ def file_complete_condition() -> tuple[bool, models.File]: updated_file = get_file(id) return (updated_file.presigned_url is not None, updated_file) return _utils.wait_for( file_complete_condition, f"Timed out waiting for file {id} to complete after {timeout} seconds", timeout, wait_interval, )
[docs] def create_dataset_file_and_wait( *, dataset_id: str, file_format: models.FileFormat | None = None, file_type: models.FileType, manifest_type: models.ManifestType | None = None, split: models.SplitName | None = None, timeout: float = DEFAULT_FILE_WAIT_TIMEOUT, wait_interval: float = DEFAULT_FILE_WAIT_INTERVAL, ) -> models.File: """Create or retrieve an archive file or manifest file for a dataset. Returns the file object with location. The function polls the API until the presigned url for the dataset file is populated or the timeout is reached. Note: Creating archive files for datasets are not currently supported and will result in an error. :param dataset_id: Id of dataset to create file for :type dataset_id: str :param file_format: File format :type file_format: Optional[models.FileFormat] :param file_type: File type :type file_type: models.FileType :param manifest_type: Manifest type :type manifest_type: Optional[models.ManifestType] :param split: Split :type split: Optional[models.SplitName] :param timeout: Number of seconds to wait for file completion (default 120 second) :type timeout: float :param wait_interval: Number of seconds between successive calls to check the file presigned url (default 0.5) :type wait_interval: float :return: File detail for the newly created or existent file :rtype: models.File :raises TimeoutError: If the timeout has been reached """ file = create_dataset_file( dataset_id=dataset_id, file_format=file_format, file_type=file_type, manifest_type=manifest_type, split=split, ) return wait_for_file(file.id, timeout=timeout, wait_interval=wait_interval)
[docs] def create_snapshot_file_and_wait( *, snapshot_id: str, file_format: models.FileFormat | None = None, file_type: models.FileType, manifest_type: models.ManifestType | None = None, split: models.SplitName | None = None, timeout: float = DEFAULT_FILE_WAIT_TIMEOUT, wait_interval: float = DEFAULT_FILE_WAIT_INTERVAL, ) -> models.File: """Create or retrieve an archive file or manifest file for a snapshot. Returns the file object with location. The function polls the API until the presigned url for the snapshot file is populated or the timeout is reached. :param snapshot_id: Id of snapshot to create file for :type snpshot_id: str :param file_format: File format :type file_format: Optional[models.FileFormat] :param file_type: File type :type file_type: models.FileType :param manifest_type: Manifest type :type manifest_type: Optional[models.ManifestType] :param split: Split :type split: Optional[models.SplitName] :param timeout: Number of seconds to wait for file completion (default 120 second) :type timeout: float :param wait_interval: Number of seconds between successive calls to check the file presigned url (default 0.5) :type wait_interval: float :return: File detail for the newly created or existent file :rtype: models.File :raises TimeoutError: If the timeout has been reached """ file = create_snapshot_file( snapshot_id=snapshot_id, file_format=file_format, file_type=file_type, manifest_type=manifest_type, split=split, ) return wait_for_file(file.id, timeout=timeout, wait_interval=wait_interval)