Label Studio SDK 实用工具模块
本节包含您可以使用SDK执行的各种实用操作。如需了解其他可能需要的操作,请参阅client、project或data manager模块。
source code 浏览git
""" .. include::../docs/utils.md
"""
import logging
from lxml import etree
from collections import defaultdict
logger = logging.getLogger(__name__)
_LABEL_TAGS = {"Label", "Choice"}
_NOT_CONTROL_TAGS = {
"Filter",
}
def parse_config(config_string):
"""Parse a given Label Studio labeling configuration and return a structured version of the configuration.
Useful for formatting results for predicted annotations and determining the type(s) of ML models that might
be relevant to the labeling project.
Parameters
----------
config_string: str
Label configuration XML as a string
Returns
-------
dict
structured config with the form:
```json
{
"<ControlTag>.name": {
"type": "ControlTag",
"to_name": ["<ObjectTag1>.name", "<ObjectTag2>.name"],
"inputs: [
{"type": "ObjectTag1", "value": "<ObjectTag1>.value"},
{"type": "ObjectTag2", "value": "<ObjectTag2>.value"}
],
"labels": ["Label1", "Label2", "Label3"] // taken from "alias" if it exists, else "value"
}
```
"""
"""
:param config_string:
"""
if not config_string:
return {}
def _is_input_tag(tag):
return tag.attrib.get("name") and tag.attrib.get("value")
def _is_output_tag(tag):
return (
tag.attrib.get("name")
and tag.attrib.get("toName")
and tag.tag not in _NOT_CONTROL_TAGS
)
def _get_parent_output_tag_name(tag, outputs):
# Find parental <Choices> tag for nested tags like <Choices><View><View><Choice>...
parent = tag
while True:
parent = parent.getparent()
if parent is None:
return
name = parent.attrib.get("name")
if name in outputs:
return name
xml_tree = etree.fromstring(config_string)
inputs, outputs, labels = {}, {}, defaultdict(dict)
for tag in xml_tree.iter():
if _is_output_tag(tag):
tag_info = {"type": tag.tag, "to_name": tag.attrib["toName"].split(",")}
# Grab conditionals if any
conditionals = {}
if tag.attrib.get("perRegion") == "true":
if tag.attrib.get("whenTagName"):
conditionals = {"type": "tag", "name": tag.attrib["whenTagName"]}
elif tag.attrib.get("whenLabelValue"):
conditionals = {
"type": "label",
"name": tag.attrib["whenLabelValue"],
}
elif tag.attrib.get("whenChoiceValue"):
conditionals = {
"type": "choice",
"name": tag.attrib["whenChoiceValue"],
}
if conditionals:
tag_info["conditionals"] = conditionals
outputs[tag.attrib["name"]] = tag_info
elif _is_input_tag(tag):
inputs[tag.attrib["name"]] = {
"type": tag.tag,
"value": tag.attrib["value"].lstrip("$"),
}
if tag.tag not in _LABEL_TAGS:
continue
parent_name = _get_parent_output_tag_name(tag, outputs)
if parent_name is not None:
actual_value = tag.attrib.get("alias") or tag.attrib.get("value")
if not actual_value:
logger.debug(
'Inspecting tag {tag_name}... found no "value" or "alias" attributes.'.format(
tag_name=etree.tostring(tag, encoding="unicode").strip()[:50]
)
)
else:
labels[parent_name][actual_value] = dict(tag.attrib)
for output_tag, tag_info in outputs.items():
tag_info["inputs"] = []
for input_tag_name in tag_info["to_name"]:
if input_tag_name not in inputs:
logger.warning(
f"to_name={input_tag_name} is specified for output tag name={output_tag}, "
"but we can't find it among input tags"
)
continue
tag_info["inputs"].append(inputs[input_tag_name])
tag_info["labels"] = list(labels[output_tag])
tag_info["labels_attrs"] = labels[output_tag]
return outputs
def chunk(lst, n):
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i : i + n]
功能
def chunk(lst, n)
-
从列表lst中生成连续的n大小块。
source code 浏览Git
def chunk(lst, n): """Yield successive n-sized chunks from lst.""" for i in range(0, len(lst), n): yield lst[i : i + n]
def parse_config(config_string)
-
解析给定的Label Studio标注配置,并返回配置的结构化版本。 可用于格式化预测标注的结果,并确定与标注项目可能相关的机器学习模型类型。
参数
config_string
:str
- Label configuration XML as a string
返回
dict
- structured config with the form:
{ "<ControlTag>.name": { "type": "ControlTag", "to_name": ["<ObjectTag1>.name", "<ObjectTag2>.name"], "inputs: [ {"type": "ObjectTag1", "value": "<ObjectTag1>.value"}, {"type": "ObjectTag2", "value": "<ObjectTag2>.value"} ], "labels": ["Label1", "Label2", "Label3"] // taken from "alias" if it exists, else "value" }
source code 浏览git
def parse_config(config_string): """Parse a given Label Studio labeling configuration and return a structured version of the configuration. Useful for formatting results for predicted annotations and determining the type(s) of ML models that might be relevant to the labeling project. Parameters ---------- config_string: str Label configuration XML as a string Returns ------- dict structured config with the form: ```json { "<ControlTag>.name": { "type": "ControlTag", "to_name": ["<ObjectTag1>.name", "<ObjectTag2>.name"], "inputs: [ {"type": "ObjectTag1", "value": "<ObjectTag1>.value"}, {"type": "ObjectTag2", "value": "<ObjectTag2>.value"} ], "labels": ["Label1", "Label2", "Label3"] // taken from "alias" if it exists, else "value" } ``` """ """ :param config_string: """ if not config_string: return {} def _is_input_tag(tag): return tag.attrib.get("name") and tag.attrib.get("value") def _is_output_tag(tag): return ( tag.attrib.get("name") and tag.attrib.get("toName") and tag.tag not in _NOT_CONTROL_TAGS ) def _get_parent_output_tag_name(tag, outputs): # Find parental <Choices> tag for nested tags like <Choices><View><View><Choice>... parent = tag while True: parent = parent.getparent() if parent is None: return name = parent.attrib.get("name") if name in outputs: return name xml_tree = etree.fromstring(config_string) inputs, outputs, labels = {}, {}, defaultdict(dict) for tag in xml_tree.iter(): if _is_output_tag(tag): tag_info = {"type": tag.tag, "to_name": tag.attrib["toName"].split(",")} # Grab conditionals if any conditionals = {} if tag.attrib.get("perRegion") == "true": if tag.attrib.get("whenTagName"): conditionals = {"type": "tag", "name": tag.attrib["whenTagName"]} elif tag.attrib.get("whenLabelValue"): conditionals = { "type": "label", "name": tag.attrib["whenLabelValue"], } elif tag.attrib.get("whenChoiceValue"): conditionals = { "type": "choice", "name": tag.attrib["whenChoiceValue"], } if conditionals: tag_info["conditionals"] = conditionals outputs[tag.attrib["name"]] = tag_info elif _is_input_tag(tag): inputs[tag.attrib["name"]] = { "type": tag.tag, "value": tag.attrib["value"].lstrip("$"), } if tag.tag not in _LABEL_TAGS: continue parent_name = _get_parent_output_tag_name(tag, outputs) if parent_name is not None: actual_value = tag.attrib.get("alias") or tag.attrib.get("value") if not actual_value: logger.debug( 'Inspecting tag {tag_name}... found no "value" or "alias" attributes.'.format( tag_name=etree.tostring(tag, encoding="unicode").strip()[:50] ) ) else: labels[parent_name][actual_value] = dict(tag.attrib) for output_tag, tag_info in outputs.items(): tag_info["inputs"] = [] for input_tag_name in tag_info["to_name"]: if input_tag_name not in inputs: logger.warning( f"to_name={input_tag_name} is specified for output tag name={output_tag}, " "but we can't find it among input tags" ) continue tag_info["inputs"].append(inputs[input_tag_name]) tag_info["labels"] = list(labels[output_tag]) tag_info["labels_attrs"] = labels[output_tag] return outputs