Label Studio SDK 的数据管理模块
类可用于在Project.get_tasks()中筛选、排序和选择项目,并为数据管理器中任务可用的所有列名提供枚举,以及其他辅助功能。
如需执行其他操作,请参阅client、project或utils模块。
示例:
from label_studio_sdk.data_manager import Filters, Column, Operator, Type
filters = Filters.create(Filters.OR, [
Filters.item(
Column.id,
Operator.GREATER,
Type.Number,
Filters.value(42)
),
Filters.item(
Column.completed_at,
Operator.IN,
Type.Datetime,
Filters.value(
datetime(2021, 11, 1),
datetime.now()
)
)
])
tasks = project.get_tasks(filters=filters)
source code 浏览Git
""" # Data Manager module for the Label Studio SDK
Classes can be used to filter, order, and select items in `label_studio_sdk.project.Project.get_tasks`
and provides enumeration for all column names available in the Data Manager for tasks, and other helpers.
See the [client](client.html), [project](project.html) or [utils](utils.html) modules for other operations you
might want to perform.
Example:
```python
from label_studio_sdk.data_manager import Filters, Column, Operator, Type
filters = Filters.create(Filters.OR, [
Filters.item(
Column.id,
Operator.GREATER,
Type.Number,
Filters.value(42)
),
Filters.item(
Column.completed_at,
Operator.IN,
Type.Datetime,
Filters.value(
datetime(2021, 11, 1),
datetime.now()
)
)
])
tasks = project.get_tasks(filters=filters)
```
"""
from datetime import datetime
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
class Filters:
"""
Use the methods and variables in this class to create and combine filters for tasks on the Label Studio Data Manager.
"""
OR = "or"
"""Combine filters with an OR"""
AND = "and"
"""Combine filters with an AND"""
@staticmethod
def create(conjunction, items):
"""Create a filter for `label_studio_sdk.project.Project.get_tasks()`
Parameters
----------
conjunction: str
The conjunction operator between filters ('or' or 'and')
items: list
What to filter, use `Filter.item()` method to build it
Returns
-------
dict
containing specified parameters
"""
return {"conjunction": conjunction, "items": items}
@staticmethod
def item(name, operator, column_type, value):
"""Use in combination with other classes to specify the contents of a filter.
Parameters
----------
name: `Column` or str
Column.id, Column.completed_at, Column.data('my_field'), etc
operator: `Operator`
Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc
column_type: `Type`
Type.Number, Type.Boolean, Type.String, etc
value: `Filters.value()`
Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now())
Returns
-------
dict
"""
return {
"filter": "filter:" + name,
"operator": operator,
"type": column_type,
"value": value,
}
@staticmethod
def datetime(dt):
"""Date time string format for filtering the Data Manager.
Parameters
----------
dt
datetime instance
Returns
-------
str
datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format
"""
assert isinstance(dt, datetime), "dt must be datetime type"
return dt.strftime(DATETIME_FORMAT)
@classmethod
def value(cls, value, maximum=None):
"""Set a filter value in the Data Manager.
Parameters
----------
value: str | int | float | datetime | boolean
value to use for filtering. If the maximum parameter is passed, then this value field is the minimum.
maximum: int | float | datetime
Specify a maximum for a filtering range with IN, NOT_IN operators.
Returns
-------
any
value for filtering
"""
if isinstance(value, datetime):
value = cls.datetime(value)
if maximum is not None:
if isinstance(maximum, datetime):
maximum = cls.datetime(maximum)
return {"min": value, "max": maximum}
return value
class Operator:
"""Specify the operator to use when creating a filter."""
EQUAL = "equal"
NOT_EQUAL = "not_equal"
LESS = "less"
GREATER = "greater"
LESS_OR_EQUAL = "less_or_equal"
GREATER_OR_EQUAL = "greater_or_equal"
IN = "in"
NOT_IN = "not_in"
IN_LIST = "in_list"
NOT_IN_LIST = "not_in_list"
EMPTY = "empty"
CONTAINS = "contains"
NOT_CONTAINS = "not_contains"
REGEX = "regex"
class Type:
"""Specify the type of data in a column."""
Number = "Number"
Datetime = "Datetime"
Boolean = "Boolean"
String = "String"
List = "List"
Unknown = "Unknown"
""" Unknown is explicitly converted to string format. """
class Column:
"""Specify the column on the Data Manager in Label Studio UI to use in the filter."""
id = "tasks:id"
"""Task ID"""
inner_id = "tasks:inner_id"
"""Task Inner ID, it starts from 1 for all projects"""
ground_truth = "tasks:ground_truth"
"""Ground truth status of the tasks"""
annotations_results = "tasks:annotations_results"
"""Annotation results for the tasks"""
reviewed = "tasks:reviewed"
"""Whether the tasks have been reviewed (Enterprise only)"""
predictions_score = "tasks:predictions_score"
"""Prediction score for the task"""
predictions_model_versions = "tasks:predictions_model_versions"
"""Model version used for the predictions"""
predictions_results = "tasks:predictions_results"
"""Prediction results for the tasks"""
file_upload = "tasks:file_upload"
"""Name of the file uploaded to create the tasks"""
created_at = "tasks:created_at"
"""Time the task was created at"""
updated_at = "tasks:updated_at"
"""Time the task was updated at (e.g. new annotation was created, review added, etc)"""
annotators = "tasks:annotators"
"""Annotators that completed the task (Community). Can include assigned annotators (Enterprise only)"""
total_predictions = "tasks:total_predictions"
"""Total number of predictions for the task"""
cancelled_annotations = "tasks:cancelled_annotations"
"""Number of cancelled or skipped annotations for the task"""
total_annotations = "tasks:total_annotations"
"""Total number of annotations on a task"""
completed_at = "tasks:completed_at"
"""Time when a task was fully annotated"""
agreement = "tasks:agreement"
"""Agreement for annotation results for a specific task (Enterprise only)"""
reviewers = "tasks:reviewers"
"""Reviewers that reviewed the task, or assigned reviewers (Enterprise only)"""
reviews_rejected = "tasks:reviews_rejected"
"""Number of annotations rejected for a task in review (Enterprise only)"""
reviews_accepted = "tasks:reviews_accepted"
"""Number of annotations accepted for a task in review (Enterprise only)"""
comments = "tasks:comments"
"""Number of comments in a task"""
unresolved_comment_count = "tasks:unresolved_comment_count"
"""Number of unresolved comments in a task"""
@staticmethod
def data(task_field):
"""Create a filter name for the task data field
Parameters
----------
task_field
Returns
-------
str
Filter name for task data
"""
return "tasks:data." + task_field
def _test():
"""Test it"""
filters = Filters.create(
Filters.OR,
[
Filters.item(Column.id, Operator.GREATER, Type.Number, Filters.value(42)),
Filters.item(
Column.completed_at,
Operator.IN,
Type.Datetime,
Filters.value(
datetime(2021, 11, 1),
datetime(2021, 11, 5),
),
),
],
)
assert filters == {
"conjunction": "or",
"items": [
{
"filter": "filter:tasks:id",
"operator": "greater",
"type": "Number",
"value": 42,
},
{
"filter": "filter:tasks:completed_at",
"operator": "in",
"type": "Datetime",
"value": {
"min": "2021-11-01T00:00:00.000000Z",
"max": "2021-11-05T00:00:00.000000Z",
},
},
],
}
类
class Column-
指定Label Studio UI中数据管理器上用于筛选的列。
source code 浏览Git
class Column: """Specify the column on the Data Manager in Label Studio UI to use in the filter.""" id = "tasks:id" """Task ID""" inner_id = "tasks:inner_id" """Task Inner ID, it starts from 1 for all projects""" ground_truth = "tasks:ground_truth" """Ground truth status of the tasks""" annotations_results = "tasks:annotations_results" """Annotation results for the tasks""" reviewed = "tasks:reviewed" """Whether the tasks have been reviewed (Enterprise only)""" predictions_score = "tasks:predictions_score" """Prediction score for the task""" predictions_model_versions = "tasks:predictions_model_versions" """Model version used for the predictions""" predictions_results = "tasks:predictions_results" """Prediction results for the tasks""" file_upload = "tasks:file_upload" """Name of the file uploaded to create the tasks""" created_at = "tasks:created_at" """Time the task was created at""" updated_at = "tasks:updated_at" """Time the task was updated at (e.g. new annotation was created, review added, etc)""" annotators = "tasks:annotators" """Annotators that completed the task (Community). Can include assigned annotators (Enterprise only)""" total_predictions = "tasks:total_predictions" """Total number of predictions for the task""" cancelled_annotations = "tasks:cancelled_annotations" """Number of cancelled or skipped annotations for the task""" total_annotations = "tasks:total_annotations" """Total number of annotations on a task""" completed_at = "tasks:completed_at" """Time when a task was fully annotated""" agreement = "tasks:agreement" """Agreement for annotation results for a specific task (Enterprise only)""" reviewers = "tasks:reviewers" """Reviewers that reviewed the task, or assigned reviewers (Enterprise only)""" reviews_rejected = "tasks:reviews_rejected" """Number of annotations rejected for a task in review (Enterprise only)""" reviews_accepted = "tasks:reviews_accepted" """Number of annotations accepted for a task in review (Enterprise only)""" comments = "tasks:comments" """Number of comments in a task""" unresolved_comment_count = "tasks:unresolved_comment_count" """Number of unresolved comments in a task""" @staticmethod def data(task_field): """Create a filter name for the task data field Parameters ---------- task_field Returns ------- str Filter name for task data """ return "tasks:data." + task_field常量
agreement-
特定任务的标注结果协议(仅限企业版)
annotations_results-
任务的标注结果
annotators-
已完成任务的标注人员(社区版)。可包含已分配的标注人员(仅限企业版)
cancelled_annotations-
任务中被取消或跳过的标注数量
comments-
任务中的评论数量
completed_at-
任务被完全标注的时间
created_at-
任务创建时间
file_upload-
上传用于创建任务的文件名称
ground_truth-
任务的基础真值状态
id-
任务ID
inner_id-
任务内部ID,所有项目都从1开始计数
predictions_model_versions-
用于预测的模型版本
predictions_results-
任务的预测结果
predictions_score-
任务的预测分数
reviewed-
任务是否已审核(仅限企业版)
reviewers-
已审核该任务的审核者,或已分配的审核者(仅限企业版)
reviews_accepted-
任务审核中接受的标注数量(仅限企业版)
reviews_rejected-
在审核中被拒绝的任务标注数量(仅限企业版)
total_annotations-
任务上的标注总数
total_predictions-
任务的总预测数量
unresolved_comment_count-
任务中未解决的评论数量
updated_at-
任务更新时间(例如创建了新标注、添加了评审等)
静态方法
def data(task_field)-
为任务数据字段创建筛选器名称
参数
task_field
返回
str- Filter name for task data
source code 浏览Git
@staticmethod def data(task_field): """Create a filter name for the task data field Parameters ---------- task_field Returns ------- str Filter name for task data """ return "tasks:data." + task_field
class Filters-
使用此类中的方法和变量为Label Studio数据管理器中的任务创建和组合过滤器。
source code 浏览Git
class Filters: """ Use the methods and variables in this class to create and combine filters for tasks on the Label Studio Data Manager. """ OR = "or" """Combine filters with an OR""" AND = "and" """Combine filters with an AND""" @staticmethod def create(conjunction, items): """Create a filter for `label_studio_sdk.project.Project.get_tasks()` Parameters ---------- conjunction: str The conjunction operator between filters ('or' or 'and') items: list What to filter, use `Filter.item()` method to build it Returns ------- dict containing specified parameters """ return {"conjunction": conjunction, "items": items} @staticmethod def item(name, operator, column_type, value): """Use in combination with other classes to specify the contents of a filter. Parameters ---------- name: `Column` or str Column.id, Column.completed_at, Column.data('my_field'), etc operator: `Operator` Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc column_type: `Type` Type.Number, Type.Boolean, Type.String, etc value: `Filters.value()` Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now()) Returns ------- dict """ return { "filter": "filter:" + name, "operator": operator, "type": column_type, "value": value, } @staticmethod def datetime(dt): """Date time string format for filtering the Data Manager. Parameters ---------- dt datetime instance Returns ------- str datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format """ assert isinstance(dt, datetime), "dt must be datetime type" return dt.strftime(DATETIME_FORMAT) @classmethod def value(cls, value, maximum=None): """Set a filter value in the Data Manager. Parameters ---------- value: str | int | float | datetime | boolean value to use for filtering. If the maximum parameter is passed, then this value field is the minimum. maximum: int | float | datetime Specify a maximum for a filtering range with IN, NOT_IN operators. Returns ------- any value for filtering """ if isinstance(value, datetime): value = cls.datetime(value) if maximum is not None: if isinstance(maximum, datetime): maximum = cls.datetime(maximum) return {"min": value, "max": maximum} return value常量
AND-
使用AND组合筛选条件
OR-
使用OR组合过滤器
静态方法
def create(conjunction, items)-
为
Project.get_tasks()创建过滤器参数
conjunction:str- The conjunction operator between filters ('or' or 'and')
items:list- What to filter, use
Filter.item()method to build it
返回
dict- containing specified parameters
source code 浏览Git
@staticmethod def create(conjunction, items): """Create a filter for `label_studio_sdk.project.Project.get_tasks()` Parameters ---------- conjunction: str The conjunction operator between filters ('or' or 'and') items: list What to filter, use `Filter.item()` method to build it Returns ------- dict containing specified parameters """ return {"conjunction": conjunction, "items": items} def datetime(dt)-
用于过滤数据管理器的日期时间字符串格式。
参数
dt- datetime instance
返回
str- datetime in
'%Y-%m-%dT%H:%M:%S.%fZ'format
source code 浏览Git
@staticmethod def datetime(dt): """Date time string format for filtering the Data Manager. Parameters ---------- dt datetime instance Returns ------- str datetime in `'%Y-%m-%dT%H:%M:%S.%fZ'` format """ assert isinstance(dt, datetime), "dt must be datetime type" return dt.strftime(DATETIME_FORMAT) def item(name, operator, column_type, value)-
与其他类结合使用以指定筛选器的内容。
参数
name:Columnorstr- Column.id, Column.completed_at, Column.data('my_field'), etc
operator:Operator- Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc
column_type:Type- Type.Number, Type.Boolean, Type.String, etc
value:Filters.value()- Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now())
返回
dict
source code 浏览Git
@staticmethod def item(name, operator, column_type, value): """Use in combination with other classes to specify the contents of a filter. Parameters ---------- name: `Column` or str Column.id, Column.completed_at, Column.data('my_field'), etc operator: `Operator` Operator.EQUAL, Operator.GREATER_OR_EQUAL, Operator.IN, etc column_type: `Type` Type.Number, Type.Boolean, Type.String, etc value: `Filters.value()` Filters.value(42), Filters.value('test'), Filters.value(datetime(2021, 01, 01), datetime.now()) Returns ------- dict """ return { "filter": "filter:" + name, "operator": operator, "type": column_type, "value": value, } def value(value, maximum=None)-
在数据管理器中设置筛选值。
参数
value:str | int | float | datetime | boolean- value to use for filtering. If the maximum parameter is passed, then this value field is the minimum.
maximum:int | float | datetime- Specify a maximum for a filtering range with IN, NOT_IN operators.
返回
any- value for filtering
source code 浏览Git
@classmethod def value(cls, value, maximum=None): """Set a filter value in the Data Manager. Parameters ---------- value: str | int | float | datetime | boolean value to use for filtering. If the maximum parameter is passed, then this value field is the minimum. maximum: int | float | datetime Specify a maximum for a filtering range with IN, NOT_IN operators. Returns ------- any value for filtering """ if isinstance(value, datetime): value = cls.datetime(value) if maximum is not None: if isinstance(maximum, datetime): maximum = cls.datetime(maximum) return {"min": value, "max": maximum} return value
class Operator-
指定创建过滤器时要使用的运算符。
source code 浏览Git
class Operator: """Specify the operator to use when creating a filter.""" EQUAL = "equal" NOT_EQUAL = "not_equal" LESS = "less" GREATER = "greater" LESS_OR_EQUAL = "less_or_equal" GREATER_OR_EQUAL = "greater_or_equal" IN = "in" NOT_IN = "not_in" IN_LIST = "in_list" NOT_IN_LIST = "not_in_list" EMPTY = "empty" CONTAINS = "contains" NOT_CONTAINS = "not_contains" REGEX = "regex"常量
CONTAINSEMPTYEQUALGREATERGREATER_OR_EQUALININ_LISTLESSLESS_OR_EQUALNOT_CONTAINSNOT_EQUALNOT_INNOT_IN_LISTREGEX
class Type-
指定列中数据的类型。
source code 浏览Git
class Type: """Specify the type of data in a column.""" Number = "Number" Datetime = "Datetime" Boolean = "Boolean" String = "String" List = "List" Unknown = "Unknown" """ Unknown is explicitly converted to string format. """常量
BooleanDatetimeListNumberStringUnknown-
未知内容被显式转换为字符串格式。