from collections.abc import Generator
from dataclasses import asdict
from typing import cast
from chariot import _apis
from chariot.datasets import _utils, models
from chariot_api._openapi.datasets_v3 import models as openapi_models
from chariot_api._openapi.datasets_v3.models.model_dataset_filter_config import (
ModelDatasetFilterConfig,
)
from chariot_api._openapi.datasets_v3.models.model_datum_filter_config import ModelDatumFilterConfig
__all__ = [
"get_task",
"get_tasks",
"count_tasks",
"create_task",
"get_datum_for_task",
"get_task_datum_count",
"get_task_activity",
"count_task_activity",
"get_tasks_activity",
"count_tasks_activity",
]
[docs]
def get_tasks(
*,
search: str | None = None,
exact_name_match: bool | None = None,
project_ids: list[str] | None = None,
task_ids: list[str] | None = None,
sort: models.TaskSortColumn | None = None,
direction: models.SortDirection | None = None,
max_items: int | None = None,
) -> Generator[models.DatumTask, None, None]:
"""Get datum annotation tasks that match various criteria. Returns a generator over all matching tasks.
:param search: Search string (full text search against name and description fields)
:type search: Optional[str]
:param exact_name_match: Require search to exactly match the task name (defaults to false)
:type exact_name_match: Optional[bool]
:param project_ids: Filter by project ids
:type project_ids: Optional[List[str]]
:param task_ids: Filter by task ids
:type task_ids: Optional[List[str]]
:param sort: What column to sort the tasks by (defaults to name)
:type sort: Optional[models.TaskSortColumn]
:param direction: Whether to sort in ascending or descending order
:type direction: Optional[models.SortDirection]
:param max_items: Limit the returned generator to only produce this many items
:type max_items: Optional[int]
:return: Task definitions for tasks matching the criteria
:rtype: Generator[models.DatumTask, None, None]
"""
params = locals()
if "max_items" in params:
del params["max_items"]
if task_ids is not None:
return _get_task_generator(**params)
return _utils.paginate_items(_get_tasks, params, max_items)
def _get_task_generator(**params) -> Generator[models.DatumTask, None, None]:
yield from _get_tasks(**params)
def _get_tasks(
*,
search: str | None = None,
exact_name_match: bool | None = None,
project_ids: list[str] | None = None,
task_ids: list[str] | None = None,
sort: models.TaskSortColumn | None = None,
direction: models.SortDirection | None = None,
limit: int | None = None,
offset: int | None = None,
) -> list[models.DatumTask]:
response = _apis.datasets_v3.tasks_api.get_datum_tasks(
openapi_models.InputGetDatumTasksRequest(
search=search,
exact_name_match=exact_name_match,
project_ids=project_ids,
task_ids=task_ids,
direction=_utils.enum_value(direction),
sort=_utils.enum_value(sort),
limit=limit,
offset=offset,
)
)
return [
_utils.convert_to_dataclass(t.model_dump(), models.DatumTask) for t in response.data or []
]
[docs]
def count_tasks(
*,
search: str | None = None,
exact_name_match: bool | None = None,
project_ids: list[str] | None = None,
task_ids: list[str] | None = None,
) -> int:
"""Get number of tasks that match given criteria.
:param search: Search string (full text search against name and description fields)
:type name: Optional[str]
:param exact_name_match: Require search to exactly match the task name (defaults to false)
:type exact_name_match: Optional[bool]
:param project_ids: Filter by project ids
:type project_ids: Optional[List[str]]
:param task_ids: Filter by task ids
:type task_ids: Optional[List[str]]
:return: Number of tasks that match given criteria
:rtype: int
"""
response = _apis.datasets_v3.tasks_api.count_datum_tasks(
openapi_models.InputCountDatumTasksRequest(
search=search,
exact_name_match=exact_name_match,
project_ids=project_ids,
task_ids=task_ids,
)
)
return response.data or 0
[docs]
def create_task(
*,
name: str,
project_id: str,
description: str | None = None,
dataset_config: models.DatasetConfig | None = None,
datum_config: models.DatumConfig | None = None,
) -> models.DatumTask:
"""Create a new datum annotation task.
:param name: Datum annotation task name
:type name: str
:param project_id: Project id that datum annotation Task belongs to
:type project_id: str
:param description: Datum annotation task description
:type description: Optional[str]
:return: New datum annotation task detail
:rtype: models.DatumTask
"""
datum_config_dict = (
asdict(datum_config, dict_factory=_utils.dict_factory) if datum_config else None
)
dataset_config_dict = (
asdict(dataset_config, dict_factory=_utils.dict_factory) if dataset_config else None
)
response = _apis.datasets_v3.tasks_api.create_datum_task(
openapi_models.InputCreateDatumTaskRequest(
name=name,
project_id=project_id,
description=description,
dataset_config=cast(ModelDatasetFilterConfig, dataset_config_dict),
datum_config=cast(ModelDatumFilterConfig, datum_config_dict),
)
)
if not response.data:
raise RuntimeError("Received malformed response (missing `data`) from create_datum_task")
return _utils.convert_to_dataclass(response.data.model_dump(), models.DatumTask)
[docs]
def get_task(id: str) -> models.DatumTaskDetails:
"""Get a datum annotation task by id.
:param id: Datum annotation task id
:type id: str
:return: The datum annotation task details
:rtype: models.DatumTask
"""
response = _apis.datasets_v3.tasks_api.get_datum_task(task_id=id)
if not response.data:
raise RuntimeError("Received malformed response (missing `data`) from get_datum_task")
return _utils.convert_to_dataclass(response.data.model_dump(), models.DatumTaskDetails)
[docs]
def get_datum_for_task(
task_id: str,
*,
unannotated: bool = False,
random: bool = False,
id_after: str | None = None,
prev_datum_id: str | None = None,
) -> models.Datum | None:
"""Get the next available datum for the given task. Returns None if there are no datums available.
:param task_id: The id of the task
:type task_id: str
:param unannotated: If true, only unannotated datums will be returned (defaults to false)
:type unannotated: Optional[bool]
:param random: If true, returns a random available datum instead of the next available datum (defaults to false)
:type random: Optional[bool]
:param id_after: If provided, will return a datum that is after the given datum id (can be used to resume a task from a specific point, or to skip a specific datum)
:type id_after: Optional[str]
:param prev_datum_id: if specified, any lock held by the user on this datum will be released if a new datum is acquired
:type prev_datum_id: Optional[str]
:return: The datum, or None if no datums matching the request are available
:rtype: Optional[models.Datum]
"""
response = _apis.datasets_v3.datums_api.get_next_datum_for_task(
task_id=task_id,
unannotated=unannotated,
sort="random" if random else "",
id_after=id_after,
prev_datum_id=prev_datum_id,
)
if not response.data or len(response.data) == 0:
return None
return _utils.convert_to_dataclass(response.data[0].model_dump(), models.Datum)
[docs]
def get_task_datum_count(
task_id: str,
) -> int:
"""Get the number of datums in the provided task.
:param task_id: The id of the task
:type task_id: str
:return: The datum count
:rtype: int
"""
response = _apis.datasets_v3.datums_api.get_task_datum_count(task_id=task_id)
return response.data or 0
[docs]
def get_task_activity(
task_id: str,
*,
activities: list[models.DatumTaskActivityCode] | None = None,
dataset_ids: list[str] | None = None,
user_ids: list[str] | None = None,
direction: models.SortDirection | None = None,
sort: models.TaskActivitySortColumn | None = None,
max_items: int | None = None,
) -> Generator[models.DatumTaskActivity, None, None]:
"""Get the activities for the provided task and filters.
:param task_id: Id of the task
:type task_id: str
:param activities: List of activity types to filter by
:type activities: Optional[List[models.DatumTaskActivityCode]]
:param dataset_ids: List of dataset ids to filter by
:type dataset_ids: Optional[List[str]]
:param user_ids: List of user ids to filter by
:type user_ids: Optional[List[str]]
:param direction: Sort direction
:type direction: Optional[models.SortDirection]
:param sort: Sort column
:type sort: Optional[models.TaskActivitySortColumn]
:param max_items: Limit the returned generator to only produce this many items
:type max_items: Optional[int]
:return: Generator over the matching task activities
:rtype: Generator[models.DatumTaskActivity, None, None]
"""
params = locals()
if "max_items" in params:
del params["max_items"]
return _utils.paginate_items(_get_task_activity, params, max_items)
def _get_task_activity(
task_id: str,
*,
activities: list[models.DatumTaskActivityCode] | None = None,
dataset_ids: list[str] | None = None,
user_ids: list[str] | None = None,
direction: models.SortDirection | None = None,
limit: int | None = None,
offset: int | None = None,
sort: models.TaskActivitySortColumn | None = None,
) -> list[models.DatumTaskActivity]:
response = _apis.datasets_v3.tasks_api.get_datum_task_activity(
task_id=task_id,
activities=[_utils.enum_value(a) for a in activities] if activities else None,
dataset_ids=dataset_ids,
user_ids=user_ids,
direction=_utils.enum_value(direction),
limit=limit,
offset=offset,
sort=_utils.enum_value(sort),
)
if not response.data:
return []
return [
_utils.convert_to_dataclass(t.model_dump(), models.DatumTaskActivity)
for t in response.data or []
]
[docs]
def count_task_activity(
task_id: str,
*,
activities: list[models.DatumTaskActivityCode] | None = None,
dataset_ids: list[str] | None = None,
user_ids: list[str] | None = None,
) -> int:
"""Count the activities for the provided task and filters.
:param task_id: Id of the task
:type task_id: str
:param activities: List of activity types to filter by
:type activities: Optional[List[models.DatumTaskActivityCode]]
:param dataset_ids: List of dataset ids to filter by
:type dataset_ids: Optional[List[str]]
:param user_ids: List of user ids to filter by
:type user_ids: Optional[List[str]]
:return: Number of matching task activities
:rtype: int
"""
response = _apis.datasets_v3.tasks_api.count_datum_task_activity(
task_id=task_id,
activities=[_utils.enum_value(a) for a in activities] if activities else None,
dataset_ids=dataset_ids,
user_ids=user_ids,
)
return response.data or 0
[docs]
def get_tasks_activity(
exact_name_match: bool | None = None,
search: str | None = None,
project_ids: list[str] | None = None,
task_ids: list[str] | None = None,
activities: list[models.DatumTaskActivityCode] | None = None,
dataset_ids: list[str] | None = None,
user_ids: list[str] | None = None,
direction: models.SortDirection | None = None,
sort: models.TaskActivitySortColumn | None = None,
max_items: int | None = None,
) -> Generator[models.DatumTaskActivity, None, None]:
"""Get the matching activities.
:param exact_name_match: Require search filter to match exactly (defaults to false)
:type exact_name_match: Optional[bool]
:param search: Search string (full text search against task name and description fields)
:type search: Optional[str]
:param project_ids: List of project ids to filter by
:type project_ids: Optional[List[str]]
:param task_ids: List of task ids to filter by
:type task_ids: Optional[List[str]]
:param activities: List of activity types to filter by
:type activities: Optional[List[models.DatumTaskActivityCode]]
:param dataset_ids: List of dataset ids to filter by
:type dataset_ids: Optional[List[str]]
:param user_ids: List of user ids to filter by
:type user_ids: Optional[List[str]]
:param direction: Sort direction
:type direction: Optional[models.SortDirection]
:param sort: Sort column
:type sort: Optional[models.TaskActivitySortColumn]
:param max_items: Limit the returned generator to only produce this many items
:type max_items: Optional[int]
:return: Generator over the matching task activities
:rtype: Generator[models.DatumTaskActivity, None, None]
"""
params = locals()
if "max_items" in params:
del params["max_items"]
return _utils.paginate_items(_get_tasks_activity, params, max_items)
def _get_tasks_activity(
exact_name_match: bool | None = None,
search: str | None = None,
project_ids: list[str] | None = None,
task_ids: list[str] | None = None,
activities: list[models.DatumTaskActivityCode] | None = None,
dataset_ids: list[str] | None = None,
user_ids: list[str] | None = None,
direction: models.SortDirection | None = None,
limit: int | None = None,
offset: int | None = None,
sort: models.TaskActivitySortColumn | None = None,
) -> list[models.DatumTaskActivity]:
response = _apis.datasets_v3.tasks_api.get_datum_tasks_activity(
exact_name_match=exact_name_match,
search=search,
project_ids=project_ids,
task_ids=task_ids,
activities=[_utils.enum_value(a) for a in activities] if activities else None,
dataset_ids=dataset_ids,
user_ids=user_ids,
direction=_utils.enum_value(direction),
limit=limit,
offset=offset,
sort=_utils.enum_value(sort),
)
if not response.data:
return []
return [
_utils.convert_to_dataclass(t.model_dump(), models.DatumTaskActivity)
for t in response.data or []
]
[docs]
def count_tasks_activity(
exact_name_match: bool | None = None,
search: str | None = None,
project_ids: list[str] | None = None,
task_ids: list[str] | None = None,
activities: list[models.DatumTaskActivityCode] | None = None,
dataset_ids: list[str] | None = None,
user_ids: list[str] | None = None,
) -> int:
"""Count matching activities .
:param exact_name_match: Require search filter to match exactly (defaults to false)
:type exact_name_match: Optional[bool]
:param search: Search string (full text search against task name and description fields)
:type search: Optional[str]
:param project_ids: List of project ids to filter by
:type project_ids: Optional[List[str]]
:param task_ids: List of task ids to filter by
:type task_ids: Optional[List[str]]
:param activities: List of activity types to filter by
:type activities: Optional[List[models.DatumTaskActivityCode]]
:param dataset_ids: List of dataset ids to filter by
:type dataset_ids: Optional[List[str]]
:param user_ids: List of user ids to filter by
:type user_ids: Optional[List[str]]
:return: Number of matching task activities
:rtype: int
"""
response = _apis.datasets_v3.tasks_api.count_datum_tasks_activity(
exact_name_match=exact_name_match,
search=search,
project_ids=project_ids,
task_ids=task_ids,
activities=[_utils.enum_value(a) for a in activities] if activities else None,
dataset_ids=dataset_ids,
user_ids=user_ids,
)
return response.data or 0