Label Studio SDK 的数据管理模块

类可用于在Project.get_tasks()中筛选、排序和选择项目,并为数据管理器中任务可用的所有列名提供枚举,以及其他辅助功能。

如需执行其他操作,请参阅clientprojectutils模块。

示例:

from label_studio_sdk.data_manager import Filters, Column, Operator, Type

filters = Filters.create(Filters.OR, [
    Filters.item(
        Column.id,
        Operator.GREATER,
        Type.Number,
        Filters.value(42)
    ),
    Filters.item(
        Column.completed_at,
        Operator.IN,
        Type.Datetime,
        Filters.value(
            datetime(2021, 11, 1),
            datetime.now()
        )
    )
])
tasks = project.get_tasks(filters=filters)
source code 浏览Git
""" # Data Manager module for the Label Studio SDK

    Classes can be used to filter, order, and select items in `label_studio_sdk.project.Project.get_tasks`
    and provides enumeration for all column names available in the Data Manager for tasks, and other helpers.

    See the [client](client.html), [project](project.html) or [utils](utils.html) modules for other operations you
    might want to perform.

    Example:

    ```python
    from label_studio_sdk.data_manager import Filters, Column, Operator, Type

    filters = Filters.create(Filters.OR, [
        Filters.item(
            Column.id,
            Operator.GREATER,
            Type.Number,
            Filters.value(42)
        ),
        Filters.item(
            Column.completed_at,
            Operator.IN,
            Type.Datetime,
            Filters.value(
                datetime(2021, 11, 1),
                datetime.now()
            )
        )
    ])
    tasks = project.get_tasks(filters=filters)
    ```
"""

from datetime import datetime

DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"


class Filters:
    """
    Use the methods and variables in this class to create and combine filters for tasks on the Label Studio Data Manager.
    """

    OR = "or"
    """Combine filters with an OR"""
    AND = "and"
    """Combine filters with an AND"""

    @staticmethod
    def create(conjunction, items):
        """Create a filter for `label_studio_sdk.project.Project.get_tasks()`

        Parameters
        ----------
        conjunction: str
            The conjunction operator between filters ('or' or 'and')
        items: list
            What to filter, use `Filter.item()` method to build it

        Returns
        -------
        dict
            containing specified parameters

        """
        return {"conjunction": conjunction, "items": items}

    @staticmethod
    def item(name, operator, column_type, value):
        """Use in combination with other classes to specify the contents of a filter.

        Parameters
        ----------
        name: `Column` or str
            Column.id, Column.completed_at, Column.data('my_field'), etc
        operator: `Operator`
            Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc
        column_type: `Type`
            Type.Number, Type.Boolean, Type.String, etc
        value: `Filters.value()`
            Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now())

        Returns
        -------
        dict
        """
        return {
            "filter": "filter:" + name,
            "operator": operator,
            "type": column_type,
            "value": value,
        }

    @staticmethod
    def datetime(dt):
        """Date time string format for filtering the Data Manager.

        Parameters
        ----------
        dt
            datetime instance

        Returns
        -------
        str
            datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format

        """
        assert isinstance(dt, datetime), "dt must be datetime type"
        return dt.strftime(DATETIME_FORMAT)

    @classmethod
    def value(cls, value, maximum=None):
        """Set a filter value in the Data Manager.

        Parameters
        ----------
        value: str | int | float | datetime | boolean
            value to use for filtering. If the maximum parameter is passed, then this value field is the minimum.

        maximum: int | float  | datetime
            Specify a maximum for a filtering range with IN, NOT_IN operators.

        Returns
        -------
        any
            value for filtering

        """
        if isinstance(value, datetime):
            value = cls.datetime(value)

        if maximum is not None:
            if isinstance(maximum, datetime):
                maximum = cls.datetime(maximum)
            return {"min": value, "max": maximum}

        return value


class Operator:
    """Specify the operator to use when creating a filter."""

    EQUAL = "equal"
    NOT_EQUAL = "not_equal"
    LESS = "less"
    GREATER = "greater"
    LESS_OR_EQUAL = "less_or_equal"
    GREATER_OR_EQUAL = "greater_or_equal"
    IN = "in"
    NOT_IN = "not_in"
    IN_LIST = "in_list"
    NOT_IN_LIST = "not_in_list"
    EMPTY = "empty"
    CONTAINS = "contains"
    NOT_CONTAINS = "not_contains"
    REGEX = "regex"


class Type:
    """Specify the type of data in a column."""

    Number = "Number"
    Datetime = "Datetime"
    Boolean = "Boolean"
    String = "String"
    List = "List"

    Unknown = "Unknown"
    """ Unknown is explicitly converted to string format. """


class Column:
    """Specify the column on the Data Manager in Label Studio UI to use in the filter."""

    id = "tasks:id"
    """Task ID"""
    inner_id = "tasks:inner_id"
    """Task Inner ID, it starts from 1 for all projects"""
    ground_truth = "tasks:ground_truth"
    """Ground truth status of the tasks"""
    annotations_results = "tasks:annotations_results"
    """Annotation results for the tasks"""
    reviewed = "tasks:reviewed"
    """Whether the tasks have been reviewed (Enterprise only)"""
    predictions_score = "tasks:predictions_score"
    """Prediction score for the task"""
    predictions_model_versions = "tasks:predictions_model_versions"
    """Model version used for the predictions"""
    predictions_results = "tasks:predictions_results"
    """Prediction results for the tasks"""
    file_upload = "tasks:file_upload"
    """Name of the file uploaded to create the tasks"""
    created_at = "tasks:created_at"
    """Time the task was created at"""
    updated_at = "tasks:updated_at"
    """Time the task was updated at (e.g. new annotation was created, review added, etc)"""
    annotators = "tasks:annotators"
    """Annotators that completed the task (Community). Can include assigned annotators (Enterprise only)"""
    total_predictions = "tasks:total_predictions"
    """Total number of predictions for the task"""
    cancelled_annotations = "tasks:cancelled_annotations"
    """Number of cancelled or skipped annotations for the task"""
    total_annotations = "tasks:total_annotations"
    """Total number of annotations on a task"""
    completed_at = "tasks:completed_at"
    """Time when a task was fully annotated"""
    agreement = "tasks:agreement"
    """Agreement for annotation results for a specific task (Enterprise only)"""
    reviewers = "tasks:reviewers"
    """Reviewers that reviewed the task, or assigned reviewers (Enterprise only)"""
    reviews_rejected = "tasks:reviews_rejected"
    """Number of annotations rejected for a task in review (Enterprise only)"""
    reviews_accepted = "tasks:reviews_accepted"
    """Number of annotations accepted for a task in review (Enterprise only)"""
    comments = "tasks:comments"
    """Number of comments in a task"""
    unresolved_comment_count = "tasks:unresolved_comment_count"
    """Number of unresolved comments in a task"""

    @staticmethod
    def data(task_field):
        """Create a filter name for the task data field

        Parameters
        ----------
        task_field

        Returns
        -------
        str
            Filter name for task data

        """
        return "tasks:data." + task_field


def _test():
    """Test it"""
    filters = Filters.create(
        Filters.OR,
        [
            Filters.item(Column.id, Operator.GREATER, Type.Number, Filters.value(42)),
            Filters.item(
                Column.completed_at,
                Operator.IN,
                Type.Datetime,
                Filters.value(
                    datetime(2021, 11, 1),
                    datetime(2021, 11, 5),
                ),
            ),
        ],
    )

    assert filters == {
        "conjunction": "or",
        "items": [
            {
                "filter": "filter:tasks:id",
                "operator": "greater",
                "type": "Number",
                "value": 42,
            },
            {
                "filter": "filter:tasks:completed_at",
                "operator": "in",
                "type": "Datetime",
                "value": {
                    "min": "2021-11-01T00:00:00.000000Z",
                    "max": "2021-11-05T00:00:00.000000Z",
                },
            },
        ],
    }

class Column

指定Label Studio UI中数据管理器上用于筛选的列。

source code 浏览Git
class Column:
    """Specify the column on the Data Manager in Label Studio UI to use in the filter."""

    id = "tasks:id"
    """Task ID"""
    inner_id = "tasks:inner_id"
    """Task Inner ID, it starts from 1 for all projects"""
    ground_truth = "tasks:ground_truth"
    """Ground truth status of the tasks"""
    annotations_results = "tasks:annotations_results"
    """Annotation results for the tasks"""
    reviewed = "tasks:reviewed"
    """Whether the tasks have been reviewed (Enterprise only)"""
    predictions_score = "tasks:predictions_score"
    """Prediction score for the task"""
    predictions_model_versions = "tasks:predictions_model_versions"
    """Model version used for the predictions"""
    predictions_results = "tasks:predictions_results"
    """Prediction results for the tasks"""
    file_upload = "tasks:file_upload"
    """Name of the file uploaded to create the tasks"""
    created_at = "tasks:created_at"
    """Time the task was created at"""
    updated_at = "tasks:updated_at"
    """Time the task was updated at (e.g. new annotation was created, review added, etc)"""
    annotators = "tasks:annotators"
    """Annotators that completed the task (Community). Can include assigned annotators (Enterprise only)"""
    total_predictions = "tasks:total_predictions"
    """Total number of predictions for the task"""
    cancelled_annotations = "tasks:cancelled_annotations"
    """Number of cancelled or skipped annotations for the task"""
    total_annotations = "tasks:total_annotations"
    """Total number of annotations on a task"""
    completed_at = "tasks:completed_at"
    """Time when a task was fully annotated"""
    agreement = "tasks:agreement"
    """Agreement for annotation results for a specific task (Enterprise only)"""
    reviewers = "tasks:reviewers"
    """Reviewers that reviewed the task, or assigned reviewers (Enterprise only)"""
    reviews_rejected = "tasks:reviews_rejected"
    """Number of annotations rejected for a task in review (Enterprise only)"""
    reviews_accepted = "tasks:reviews_accepted"
    """Number of annotations accepted for a task in review (Enterprise only)"""
    comments = "tasks:comments"
    """Number of comments in a task"""
    unresolved_comment_count = "tasks:unresolved_comment_count"
    """Number of unresolved comments in a task"""

    @staticmethod
    def data(task_field):
        """Create a filter name for the task data field

        Parameters
        ----------
        task_field

        Returns
        -------
        str
            Filter name for task data

        """
        return "tasks:data." + task_field

常量

agreement

特定任务的标注结果协议(仅限企业版)

annotations_results

任务的标注结果

annotators

已完成任务的标注人员(社区版)。可包含已分配的标注人员(仅限企业版)

cancelled_annotations

任务中被取消或跳过的标注数量

comments

任务中的评论数量

completed_at

任务被完全标注的时间

created_at

任务创建时间

file_upload

上传用于创建任务的文件名称

ground_truth

任务的基础真值状态

id

任务ID

inner_id

任务内部ID,所有项目都从1开始计数

predictions_model_versions

用于预测的模型版本

predictions_results

任务的预测结果

predictions_score

任务的预测分数

reviewed

任务是否已审核(仅限企业版)

reviewers

已审核该任务的审核者,或已分配的审核者(仅限企业版)

reviews_accepted

任务审核中接受的标注数量(仅限企业版)

reviews_rejected

在审核中被拒绝的任务标注数量(仅限企业版)

total_annotations

任务上的标注总数

total_predictions

任务的总预测数量

unresolved_comment_count

任务中未解决的评论数量

updated_at

任务更新时间(例如创建了新标注、添加了评审等)

静态方法

def data(task_field)

为任务数据字段创建筛选器名称

参数

task_field
 

返回

str
Filter name for task data
source code 浏览Git
@staticmethod
def data(task_field):
    """Create a filter name for the task data field

    Parameters
    ----------
    task_field

    Returns
    -------
    str
        Filter name for task data

    """
    return "tasks:data." + task_field
class Filters

使用此类中的方法和变量为Label Studio数据管理器中的任务创建和组合过滤器。

source code 浏览Git
class Filters:
    """
    Use the methods and variables in this class to create and combine filters for tasks on the Label Studio Data Manager.
    """

    OR = "or"
    """Combine filters with an OR"""
    AND = "and"
    """Combine filters with an AND"""

    @staticmethod
    def create(conjunction, items):
        """Create a filter for `label_studio_sdk.project.Project.get_tasks()`

        Parameters
        ----------
        conjunction: str
            The conjunction operator between filters ('or' or 'and')
        items: list
            What to filter, use `Filter.item()` method to build it

        Returns
        -------
        dict
            containing specified parameters

        """
        return {"conjunction": conjunction, "items": items}

    @staticmethod
    def item(name, operator, column_type, value):
        """Use in combination with other classes to specify the contents of a filter.

        Parameters
        ----------
        name: `Column` or str
            Column.id, Column.completed_at, Column.data('my_field'), etc
        operator: `Operator`
            Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc
        column_type: `Type`
            Type.Number, Type.Boolean, Type.String, etc
        value: `Filters.value()`
            Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now())

        Returns
        -------
        dict
        """
        return {
            "filter": "filter:" + name,
            "operator": operator,
            "type": column_type,
            "value": value,
        }

    @staticmethod
    def datetime(dt):
        """Date time string format for filtering the Data Manager.

        Parameters
        ----------
        dt
            datetime instance

        Returns
        -------
        str
            datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format

        """
        assert isinstance(dt, datetime), "dt must be datetime type"
        return dt.strftime(DATETIME_FORMAT)

    @classmethod
    def value(cls, value, maximum=None):
        """Set a filter value in the Data Manager.

        Parameters
        ----------
        value: str | int | float | datetime | boolean
            value to use for filtering. If the maximum parameter is passed, then this value field is the minimum.

        maximum: int | float  | datetime
            Specify a maximum for a filtering range with IN, NOT_IN operators.

        Returns
        -------
        any
            value for filtering

        """
        if isinstance(value, datetime):
            value = cls.datetime(value)

        if maximum is not None:
            if isinstance(maximum, datetime):
                maximum = cls.datetime(maximum)
            return {"min": value, "max": maximum}

        return value

常量

AND

使用AND组合筛选条件

OR

使用OR组合过滤器

静态方法

def create(conjunction, items)

Project.get_tasks()创建过滤器

参数

conjunction : str
The conjunction operator between filters ('or' or 'and')
items : list
What to filter, use Filter.item() method to build it

返回

dict
containing specified parameters
source code 浏览Git
@staticmethod
def create(conjunction, items):
    """Create a filter for `label_studio_sdk.project.Project.get_tasks()`

    Parameters
    ----------
    conjunction: str
        The conjunction operator between filters ('or' or 'and')
    items: list
        What to filter, use `Filter.item()` method to build it

    Returns
    -------
    dict
        containing specified parameters

    """
    return {"conjunction": conjunction, "items": items}
def datetime(dt)

用于过滤数据管理器的日期时间字符串格式。

参数

dt
datetime instance

返回

str
datetime in '%Y-%m-%dT%H:%M:%S.%fZ' format
source code 浏览Git
@staticmethod
def datetime(dt):
    """Date time string format for filtering the Data Manager.

    Parameters
    ----------
    dt
        datetime instance

    Returns
    -------
    str
        datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format

    """
    assert isinstance(dt, datetime), "dt must be datetime type"
    return dt.strftime(DATETIME_FORMAT)
def item(name, operator, column_type, value)

与其他类结合使用以指定筛选器的内容。

参数

name : Column or str
Column.id, Column.completed_at, Column.data('my_field'), etc
operator : Operator
Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc
column_type : Type
Type.Number, Type.Boolean, Type.String, etc
value : Filters.value()
Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now())

返回

dict
 
source code 浏览Git
@staticmethod
def item(name, operator, column_type, value):
    """Use in combination with other classes to specify the contents of a filter.

    Parameters
    ----------
    name: `Column` or str
        Column.id, Column.completed_at, Column.data('my_field'), etc
    operator: `Operator`
        Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc
    column_type: `Type`
        Type.Number, Type.Boolean, Type.String, etc
    value: `Filters.value()`
        Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now())

    Returns
    -------
    dict
    """
    return {
        "filter": "filter:" + name,
        "operator": operator,
        "type": column_type,
        "value": value,
    }
def value(value, maximum=None)

在数据管理器中设置筛选值。

参数

value : str | int | float | datetime | boolean
value to use for filtering. If the maximum parameter is passed, then this value field is the minimum.
maximum : int | float | datetime
Specify a maximum for a filtering range with IN, NOT_IN operators.

返回

any
value for filtering
source code 浏览Git
@classmethod
def value(cls, value, maximum=None):
    """Set a filter value in the Data Manager.

    Parameters
    ----------
    value: str | int | float | datetime | boolean
        value to use for filtering. If the maximum parameter is passed, then this value field is the minimum.

    maximum: int | float  | datetime
        Specify a maximum for a filtering range with IN, NOT_IN operators.

    Returns
    -------
    any
        value for filtering

    """
    if isinstance(value, datetime):
        value = cls.datetime(value)

    if maximum is not None:
        if isinstance(maximum, datetime):
            maximum = cls.datetime(maximum)
        return {"min": value, "max": maximum}

    return value
class Operator

指定创建过滤器时要使用的运算符。

source code 浏览Git
class Operator:
    """Specify the operator to use when creating a filter."""

    EQUAL = "equal"
    NOT_EQUAL = "not_equal"
    LESS = "less"
    GREATER = "greater"
    LESS_OR_EQUAL = "less_or_equal"
    GREATER_OR_EQUAL = "greater_or_equal"
    IN = "in"
    NOT_IN = "not_in"
    IN_LIST = "in_list"
    NOT_IN_LIST = "not_in_list"
    EMPTY = "empty"
    CONTAINS = "contains"
    NOT_CONTAINS = "not_contains"
    REGEX = "regex"

常量

CONTAINS
EMPTY
EQUAL
GREATER
GREATER_OR_EQUAL
IN
IN_LIST
LESS
LESS_OR_EQUAL
NOT_CONTAINS
NOT_EQUAL
NOT_IN
NOT_IN_LIST
REGEX
class Type

指定列中数据的类型。

source code 浏览Git
class Type:
    """Specify the type of data in a column."""

    Number = "Number"
    Datetime = "Datetime"
    Boolean = "Boolean"
    String = "String"
    List = "List"

    Unknown = "Unknown"
    """ Unknown is explicitly converted to string format. """

常量

Boolean
Datetime
List
Number
String
Unknown

未知内容被显式转换为字符串格式。