from chariot import _apis
from chariot.datasets import _utils, models
from chariot_api._openapi.datasets_v3 import models as openapi_models
__all__ = [
"get_file",
"get_dataset_files",
"get_snapshot_files",
"create_dataset_file",
"create_snapshot_file",
"create_dataset_file_and_wait",
"create_snapshot_file_and_wait",
"wait_for_file",
]
DEFAULT_FILE_WAIT_TIMEOUT = 120
DEFAULT_FILE_WAIT_INTERVAL = 0.5
[docs]
def get_file(id: str) -> models.File:
response = _apis.datasets_v3.files_api.get_file(file_id=id)
if not response.data:
raise RuntimeError("Received malformed response (missing `data`) from get_file")
return _utils.convert_to_dataclass(response.data.model_dump(), models.File)
[docs]
def get_dataset_files(dataset_id: str) -> list[models.File]:
"""Get files for a dataset
:param dataset_id: Dataset ID to retrieve files for.
:type dataset_id: str
:return: File details for the dataset ID
:rtype: List[models.File]
"""
response = _apis.datasets_v3.files_api.get_dataset_files(dataset_id=dataset_id)
if not response.data:
return []
return [_utils.convert_to_dataclass(f.model_dump(), models.File) for f in response.data]
[docs]
def get_snapshot_files(snapshot_id: str) -> list[models.File]:
"""Get files for a snapshot
:param snapshot_id: Snapshot ID to retrieve files for.
:type snapshot_id: str
:return: File details for the snapshot ID
:rtype: List[models.File]
"""
response = _apis.datasets_v3.files_api.get_snapshot_files(snapshot_id=snapshot_id)
if not response.data:
return []
return [_utils.convert_to_dataclass(f.model_dump(), models.File) for f in response.data]
[docs]
def create_dataset_file(
*,
dataset_id: str,
file_format: models.FileFormat | None = None,
file_type: models.FileType,
manifest_type: models.ManifestType | None = None,
split: models.SplitName | None = None,
) -> models.File:
"""Create or retrieve an archive file or manifest file for a dataset, return the file object with location if available.
The Function only starts the file creation process if the file does not already exist.
Note: Creating archive files for datasets are not currently supported and will result in an error.
:param dataset_id: Id of dataset to create file for
:type dataset_id: str
:param file_format: File format
:type file_format: Optional[models.FileFormat]
:param file_type: File type
:type file_type: models.FileType
:param manifest_type: Manifest type
:type manifest_type: Optional[models.ManifestType]
:param split: Split
:type split: Optional[models.SplitName]
:return: File detail for the newly created or existent file
:rtype: models.File
"""
request = openapi_models.InputCreateFileRequest(
file_format=_utils.enum_value(file_format),
file_type=_utils.enum_value(file_type),
manifest_type=_utils.enum_value(manifest_type),
split=_utils.enum_value(split),
)
response = _apis.datasets_v3.files_api.create_dataset_file(dataset_id=dataset_id, body=request)
if not response.data:
raise RuntimeError("Received malformed response (missing `data`) from create_dataset_file")
return _utils.convert_to_dataclass(response.data.model_dump(), models.File)
[docs]
def create_snapshot_file(
*,
snapshot_id: str,
file_format: models.FileFormat | None = None,
file_type: models.FileType,
manifest_type: models.ManifestType | None = None,
split: models.SplitName | None = None,
) -> models.File:
"""Create or retrieve an archive file or manifest file for a snapshot, return the file object with location if available.
The Function only starts the file creation process if the file does not already exist.
:param snapshot_id: Id of snapshot to create file for
:type snpshot_id: str
:param file_format: File format
:type file_format: Optional[models.FileFormat]
:param file_type: File type
:type file_type: models.FileType
:param manifest_type: Manifest type
:type manifest_type: Optional[models.ManifestType]
:param split: Split
:type split: Optional[models.SplitName]
:return: File detail for the newly created or existent file
:rtype: models.File
"""
request = openapi_models.InputCreateFileRequest(
file_format=_utils.enum_value(file_format),
file_type=_utils.enum_value(file_type),
manifest_type=_utils.enum_value(manifest_type),
split=_utils.enum_value(split),
)
response = _apis.datasets_v3.files_api.create_snapshot_file(
snapshot_id=snapshot_id, body=request
)
if not response.data:
raise RuntimeError("Received malformed response (missing `data`) from create_snapshot_file")
return _utils.convert_to_dataclass(response.data.model_dump(), models.File)
[docs]
def wait_for_file(
id: str,
*,
timeout: float = DEFAULT_FILE_WAIT_TIMEOUT,
wait_interval: float = DEFAULT_FILE_WAIT_INTERVAL,
) -> models.File:
"""Polls the given file until it has finished processing.
:param id: Id of the file to wait for
:type id: str
:param timeout: Number of seconds to wait for file to complete (default 120)
:type timeout: float
:param wait_interval: Number of seconds between successive calls to check the file for completion (default 0.5)
:type wait_interval: float
:return: The file details
:rtype: models.File
:raises TimeoutError: If the timeout has been reached
"""
def file_complete_condition() -> tuple[bool, models.File]:
updated_file = get_file(id)
return (updated_file.presigned_url is not None, updated_file)
return _utils.wait_for(
file_complete_condition,
f"Timed out waiting for file {id} to complete after {timeout} seconds",
timeout,
wait_interval,
)
[docs]
def create_dataset_file_and_wait(
*,
dataset_id: str,
file_format: models.FileFormat | None = None,
file_type: models.FileType,
manifest_type: models.ManifestType | None = None,
split: models.SplitName | None = None,
timeout: float = DEFAULT_FILE_WAIT_TIMEOUT,
wait_interval: float = DEFAULT_FILE_WAIT_INTERVAL,
) -> models.File:
"""Create or retrieve an archive file or manifest file for a dataset. Returns the file object with location.
The function polls the API until the presigned url for the dataset file is populated or the timeout is reached.
Note: Creating archive files for datasets are not currently supported and will result in an error.
:param dataset_id: Id of dataset to create file for
:type dataset_id: str
:param file_format: File format
:type file_format: Optional[models.FileFormat]
:param file_type: File type
:type file_type: models.FileType
:param manifest_type: Manifest type
:type manifest_type: Optional[models.ManifestType]
:param split: Split
:type split: Optional[models.SplitName]
:param timeout: Number of seconds to wait for file completion (default 120 second)
:type timeout: float
:param wait_interval: Number of seconds between successive calls to check the file presigned url (default 0.5)
:type wait_interval: float
:return: File detail for the newly created or existent file
:rtype: models.File
:raises TimeoutError: If the timeout has been reached
"""
file = create_dataset_file(
dataset_id=dataset_id,
file_format=file_format,
file_type=file_type,
manifest_type=manifest_type,
split=split,
)
return wait_for_file(file.id, timeout=timeout, wait_interval=wait_interval)
[docs]
def create_snapshot_file_and_wait(
*,
snapshot_id: str,
file_format: models.FileFormat | None = None,
file_type: models.FileType,
manifest_type: models.ManifestType | None = None,
split: models.SplitName | None = None,
timeout: float = DEFAULT_FILE_WAIT_TIMEOUT,
wait_interval: float = DEFAULT_FILE_WAIT_INTERVAL,
) -> models.File:
"""Create or retrieve an archive file or manifest file for a snapshot. Returns the file object with location.
The function polls the API until the presigned url for the snapshot file is populated or the timeout is reached.
:param snapshot_id: Id of snapshot to create file for
:type snpshot_id: str
:param file_format: File format
:type file_format: Optional[models.FileFormat]
:param file_type: File type
:type file_type: models.FileType
:param manifest_type: Manifest type
:type manifest_type: Optional[models.ManifestType]
:param split: Split
:type split: Optional[models.SplitName]
:param timeout: Number of seconds to wait for file completion (default 120 second)
:type timeout: float
:param wait_interval: Number of seconds between successive calls to check the file presigned url (default 0.5)
:type wait_interval: float
:return: File detail for the newly created or existent file
:rtype: models.File
:raises TimeoutError: If the timeout has been reached
"""
file = create_snapshot_file(
snapshot_id=snapshot_id,
file_format=file_format,
file_type=file_type,
manifest_type=manifest_type,
split=split,
)
return wait_for_file(file.id, timeout=timeout, wait_interval=wait_interval)