Source code for chariot.datasets.tasks

from collections.abc import Generator
from dataclasses import asdict
from typing import cast

from chariot import _apis
from chariot.datasets import _utils, models
from chariot_api._openapi.datasets_v3 import models as openapi_models
from chariot_api._openapi.datasets_v3.models.model_dataset_filter_config import (
    ModelDatasetFilterConfig,
)
from chariot_api._openapi.datasets_v3.models.model_datum_filter_config import ModelDatumFilterConfig

__all__ = [
    "get_task",
    "get_tasks",
    "count_tasks",
    "create_task",
    "get_datum_for_task",
    "get_task_datum_count",
    "get_task_activity",
    "count_task_activity",
    "get_tasks_activity",
    "count_tasks_activity",
]


[docs] def get_tasks( *, search: str | None = None, exact_name_match: bool | None = None, project_ids: list[str] | None = None, task_ids: list[str] | None = None, sort: models.TaskSortColumn | None = None, direction: models.SortDirection | None = None, max_items: int | None = None, ) -> Generator[models.DatumTask, None, None]: """Get datum annotation tasks that match various criteria. Returns a generator over all matching tasks. :param search: Search string (full text search against name and description fields) :type search: Optional[str] :param exact_name_match: Require search to exactly match the task name (defaults to false) :type exact_name_match: Optional[bool] :param project_ids: Filter by project ids :type project_ids: Optional[List[str]] :param task_ids: Filter by task ids :type task_ids: Optional[List[str]] :param sort: What column to sort the tasks by (defaults to name) :type sort: Optional[models.TaskSortColumn] :param direction: Whether to sort in ascending or descending order :type direction: Optional[models.SortDirection] :param max_items: Limit the returned generator to only produce this many items :type max_items: Optional[int] :return: Task definitions for tasks matching the criteria :rtype: Generator[models.DatumTask, None, None] """ params = locals() if "max_items" in params: del params["max_items"] if task_ids is not None: return _get_task_generator(**params) return _utils.paginate_items(_get_tasks, params, max_items)
def _get_task_generator(**params) -> Generator[models.DatumTask, None, None]: yield from _get_tasks(**params) def _get_tasks( *, search: str | None = None, exact_name_match: bool | None = None, project_ids: list[str] | None = None, task_ids: list[str] | None = None, sort: models.TaskSortColumn | None = None, direction: models.SortDirection | None = None, limit: int | None = None, offset: int | None = None, ) -> list[models.DatumTask]: response = _apis.datasets_v3.tasks_api.get_datum_tasks( openapi_models.InputGetDatumTasksRequest( search=search, exact_name_match=exact_name_match, project_ids=project_ids, task_ids=task_ids, direction=_utils.enum_value(direction), sort=_utils.enum_value(sort), limit=limit, offset=offset, ) ) return [ _utils.convert_to_dataclass(t.model_dump(), models.DatumTask) for t in response.data or [] ]
[docs] def count_tasks( *, search: str | None = None, exact_name_match: bool | None = None, project_ids: list[str] | None = None, task_ids: list[str] | None = None, ) -> int: """Get number of tasks that match given criteria. :param search: Search string (full text search against name and description fields) :type name: Optional[str] :param exact_name_match: Require search to exactly match the task name (defaults to false) :type exact_name_match: Optional[bool] :param project_ids: Filter by project ids :type project_ids: Optional[List[str]] :param task_ids: Filter by task ids :type task_ids: Optional[List[str]] :return: Number of tasks that match given criteria :rtype: int """ response = _apis.datasets_v3.tasks_api.count_datum_tasks( openapi_models.InputCountDatumTasksRequest( search=search, exact_name_match=exact_name_match, project_ids=project_ids, task_ids=task_ids, ) ) return response.data or 0
[docs] def create_task( *, name: str, project_id: str, description: str | None = None, dataset_config: models.DatasetConfig | None = None, datum_config: models.DatumConfig | None = None, ) -> models.DatumTask: """Create a new datum annotation task. :param name: Datum annotation task name :type name: str :param project_id: Project id that datum annotation Task belongs to :type project_id: str :param description: Datum annotation task description :type description: Optional[str] :return: New datum annotation task detail :rtype: models.DatumTask """ datum_config_dict = ( asdict(datum_config, dict_factory=_utils.dict_factory) if datum_config else None ) dataset_config_dict = ( asdict(dataset_config, dict_factory=_utils.dict_factory) if dataset_config else None ) response = _apis.datasets_v3.tasks_api.create_datum_task( openapi_models.InputCreateDatumTaskRequest( name=name, project_id=project_id, description=description, dataset_config=cast(ModelDatasetFilterConfig, dataset_config_dict), datum_config=cast(ModelDatumFilterConfig, datum_config_dict), ) ) if not response.data: raise RuntimeError("Received malformed response (missing `data`) from create_datum_task") return _utils.convert_to_dataclass(response.data.model_dump(), models.DatumTask)
[docs] def get_task(id: str) -> models.DatumTaskDetails: """Get a datum annotation task by id. :param id: Datum annotation task id :type id: str :return: The datum annotation task details :rtype: models.DatumTask """ response = _apis.datasets_v3.tasks_api.get_datum_task(task_id=id) if not response.data: raise RuntimeError("Received malformed response (missing `data`) from get_datum_task") return _utils.convert_to_dataclass(response.data.model_dump(), models.DatumTaskDetails)
[docs] def get_datum_for_task( task_id: str, *, unannotated: bool = False, random: bool = False, id_after: str | None = None, prev_datum_id: str | None = None, ) -> models.Datum | None: """Get the next available datum for the given task. Returns None if there are no datums available. :param task_id: The id of the task :type task_id: str :param unannotated: If true, only unannotated datums will be returned (defaults to false) :type unannotated: Optional[bool] :param random: If true, returns a random available datum instead of the next available datum (defaults to false) :type random: Optional[bool] :param id_after: If provided, will return a datum that is after the given datum id (can be used to resume a task from a specific point, or to skip a specific datum) :type id_after: Optional[str] :param prev_datum_id: if specified, any lock held by the user on this datum will be released if a new datum is acquired :type prev_datum_id: Optional[str] :return: The datum, or None if no datums matching the request are available :rtype: Optional[models.Datum] """ response = _apis.datasets_v3.datums_api.get_next_datum_for_task( task_id=task_id, unannotated=unannotated, sort="random" if random else "", id_after=id_after, prev_datum_id=prev_datum_id, ) if not response.data or len(response.data) == 0: return None return _utils.convert_to_dataclass(response.data[0].model_dump(), models.Datum)
[docs] def get_task_datum_count( task_id: str, ) -> int: """Get the number of datums in the provided task. :param task_id: The id of the task :type task_id: str :return: The datum count :rtype: int """ response = _apis.datasets_v3.datums_api.get_task_datum_count(task_id=task_id) return response.data or 0
[docs] def get_task_activity( task_id: str, *, activities: list[models.DatumTaskActivityCode] | None = None, dataset_ids: list[str] | None = None, user_ids: list[str] | None = None, direction: models.SortDirection | None = None, sort: models.TaskActivitySortColumn | None = None, max_items: int | None = None, ) -> Generator[models.DatumTaskActivity, None, None]: """Get the activities for the provided task and filters. :param task_id: Id of the task :type task_id: str :param activities: List of activity types to filter by :type activities: Optional[List[models.DatumTaskActivityCode]] :param dataset_ids: List of dataset ids to filter by :type dataset_ids: Optional[List[str]] :param user_ids: List of user ids to filter by :type user_ids: Optional[List[str]] :param direction: Sort direction :type direction: Optional[models.SortDirection] :param sort: Sort column :type sort: Optional[models.TaskActivitySortColumn] :param max_items: Limit the returned generator to only produce this many items :type max_items: Optional[int] :return: Generator over the matching task activities :rtype: Generator[models.DatumTaskActivity, None, None] """ params = locals() if "max_items" in params: del params["max_items"] return _utils.paginate_items(_get_task_activity, params, max_items)
def _get_task_activity( task_id: str, *, activities: list[models.DatumTaskActivityCode] | None = None, dataset_ids: list[str] | None = None, user_ids: list[str] | None = None, direction: models.SortDirection | None = None, limit: int | None = None, offset: int | None = None, sort: models.TaskActivitySortColumn | None = None, ) -> list[models.DatumTaskActivity]: response = _apis.datasets_v3.tasks_api.get_datum_task_activity( task_id=task_id, activities=[_utils.enum_value(a) for a in activities] if activities else None, dataset_ids=dataset_ids, user_ids=user_ids, direction=_utils.enum_value(direction), limit=limit, offset=offset, sort=_utils.enum_value(sort), ) if not response.data: return [] return [ _utils.convert_to_dataclass(t.model_dump(), models.DatumTaskActivity) for t in response.data or [] ]
[docs] def count_task_activity( task_id: str, *, activities: list[models.DatumTaskActivityCode] | None = None, dataset_ids: list[str] | None = None, user_ids: list[str] | None = None, ) -> int: """Count the activities for the provided task and filters. :param task_id: Id of the task :type task_id: str :param activities: List of activity types to filter by :type activities: Optional[List[models.DatumTaskActivityCode]] :param dataset_ids: List of dataset ids to filter by :type dataset_ids: Optional[List[str]] :param user_ids: List of user ids to filter by :type user_ids: Optional[List[str]] :return: Number of matching task activities :rtype: int """ response = _apis.datasets_v3.tasks_api.count_datum_task_activity( task_id=task_id, activities=[_utils.enum_value(a) for a in activities] if activities else None, dataset_ids=dataset_ids, user_ids=user_ids, ) return response.data or 0
[docs] def get_tasks_activity( exact_name_match: bool | None = None, search: str | None = None, project_ids: list[str] | None = None, task_ids: list[str] | None = None, activities: list[models.DatumTaskActivityCode] | None = None, dataset_ids: list[str] | None = None, user_ids: list[str] | None = None, direction: models.SortDirection | None = None, sort: models.TaskActivitySortColumn | None = None, max_items: int | None = None, ) -> Generator[models.DatumTaskActivity, None, None]: """Get the matching activities. :param exact_name_match: Require search filter to match exactly (defaults to false) :type exact_name_match: Optional[bool] :param search: Search string (full text search against task name and description fields) :type search: Optional[str] :param project_ids: List of project ids to filter by :type project_ids: Optional[List[str]] :param task_ids: List of task ids to filter by :type task_ids: Optional[List[str]] :param activities: List of activity types to filter by :type activities: Optional[List[models.DatumTaskActivityCode]] :param dataset_ids: List of dataset ids to filter by :type dataset_ids: Optional[List[str]] :param user_ids: List of user ids to filter by :type user_ids: Optional[List[str]] :param direction: Sort direction :type direction: Optional[models.SortDirection] :param sort: Sort column :type sort: Optional[models.TaskActivitySortColumn] :param max_items: Limit the returned generator to only produce this many items :type max_items: Optional[int] :return: Generator over the matching task activities :rtype: Generator[models.DatumTaskActivity, None, None] """ params = locals() if "max_items" in params: del params["max_items"] return _utils.paginate_items(_get_tasks_activity, params, max_items)
def _get_tasks_activity( exact_name_match: bool | None = None, search: str | None = None, project_ids: list[str] | None = None, task_ids: list[str] | None = None, activities: list[models.DatumTaskActivityCode] | None = None, dataset_ids: list[str] | None = None, user_ids: list[str] | None = None, direction: models.SortDirection | None = None, limit: int | None = None, offset: int | None = None, sort: models.TaskActivitySortColumn | None = None, ) -> list[models.DatumTaskActivity]: response = _apis.datasets_v3.tasks_api.get_datum_tasks_activity( exact_name_match=exact_name_match, search=search, project_ids=project_ids, task_ids=task_ids, activities=[_utils.enum_value(a) for a in activities] if activities else None, dataset_ids=dataset_ids, user_ids=user_ids, direction=_utils.enum_value(direction), limit=limit, offset=offset, sort=_utils.enum_value(sort), ) if not response.data: return [] return [ _utils.convert_to_dataclass(t.model_dump(), models.DatumTaskActivity) for t in response.data or [] ]
[docs] def count_tasks_activity( exact_name_match: bool | None = None, search: str | None = None, project_ids: list[str] | None = None, task_ids: list[str] | None = None, activities: list[models.DatumTaskActivityCode] | None = None, dataset_ids: list[str] | None = None, user_ids: list[str] | None = None, ) -> int: """Count matching activities . :param exact_name_match: Require search filter to match exactly (defaults to false) :type exact_name_match: Optional[bool] :param search: Search string (full text search against task name and description fields) :type search: Optional[str] :param project_ids: List of project ids to filter by :type project_ids: Optional[List[str]] :param task_ids: List of task ids to filter by :type task_ids: Optional[List[str]] :param activities: List of activity types to filter by :type activities: Optional[List[models.DatumTaskActivityCode]] :param dataset_ids: List of dataset ids to filter by :type dataset_ids: Optional[List[str]] :param user_ids: List of user ids to filter by :type user_ids: Optional[List[str]] :return: Number of matching task activities :rtype: int """ response = _apis.datasets_v3.tasks_api.count_datum_tasks_activity( exact_name_match=exact_name_match, search=search, project_ids=project_ids, task_ids=task_ids, activities=[_utils.enum_value(a) for a in activities] if activities else None, dataset_ids=dataset_ids, user_ids=user_ids, ) return response.data or 0