基类: BaseNodePostprocessor
PII节点处理器。
注意:这是一个测试版功能,API可能会变更。
参数:
名称 |
类型 |
描述 |
默认值 |
llm
|
LLM
|
|
required
|
pii_str_tmpl
|
str
|
|
'The current context information is provided. \nA task is also provided to mask the PII within the context. \nReturn the text, with all PII masked out, and a mapping of the original PII to the masked PII. \nReturn the output of the task in JSON. \nContext:\nHello Zhang Wei, I am John. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0008 has a minimum payment of $24.53 that is due by July 31st. Based on your autopay settings, we will withdraw your payment. Task: Mask out the PII, replace each PII with a tag, and return the text. Return the mapping in JSON. \nOutput: \nHello [NAME1], I am [NAME2]. Your AnyCompany Financial Services, LLC credit card account [CREDIT_CARD_NUMBER] has a minimum payment of $24.53 that is due by [DATE_TIME]. Based on your autopay settings, we will withdraw your payment. Output Mapping:\n{{"NAME1": "Zhang Wei", "NAME2": "John", "CREDIT_CARD_NUMBER": "1111-0000-1111-0008", "DATE_TIME": "July 31st"}}\nContext:\n{context_str}\nTask: {query_str}\nOutput: \n'
|
pii_node_info_key
|
str
|
|
'__pii_node_info__'
|
Source code in llama-index-core/llama_index/core/postprocessor/pii.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95 | class PIINodePostprocessor(BaseNodePostprocessor):
"""
PII Node processor.
NOTE: this is a beta feature, the API might change.
Args:
llm (LLM): The local LLM to use for prediction.
"""
llm: LLM
pii_str_tmpl: str = DEFAULT_PII_TMPL
pii_node_info_key: str = "__pii_node_info__"
@classmethod
def class_name(cls) -> str:
return "PIINodePostprocessor"
def mask_pii(self, text: str) -> Tuple[str, Dict]:
"""Mask PII in text."""
pii_prompt = PromptTemplate(self.pii_str_tmpl)
# TODO: allow customization
task_str = (
"Mask out the PII, replace each PII with a tag, and return the text. "
"Return the mapping in JSON."
)
response = self.llm.predict(pii_prompt, context_str=text, query_str=task_str)
splits = response.split("Output Mapping:")
text_output = splits[0].strip()
json_str_output = splits[1].strip()
json_dict = json.loads(json_str_output)
return text_output, json_dict
def _postprocess_nodes(
self,
nodes: List[NodeWithScore],
query_bundle: Optional[QueryBundle] = None,
) -> List[NodeWithScore]:
"""Postprocess nodes."""
# swap out text from nodes, with the original node mappings
new_nodes = []
for node_with_score in nodes:
node = node_with_score.node
new_text, mapping_info = self.mask_pii(
node.get_content(metadata_mode=MetadataMode.LLM)
)
new_node = deepcopy(node)
new_node.excluded_embed_metadata_keys.append(self.pii_node_info_key)
new_node.excluded_llm_metadata_keys.append(self.pii_node_info_key)
new_node.metadata[self.pii_node_info_key] = mapping_info
new_node.set_content(new_text)
new_nodes.append(NodeWithScore(node=new_node, score=node_with_score.score))
return new_nodes
|
mask_pii
mask_pii(text: str) -> Tuple[str, Dict]
在文本中隐藏个人身份信息(PII)。
Source code in llama-index-core/llama_index/core/postprocessor/pii.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73 | def mask_pii(self, text: str) -> Tuple[str, Dict]:
"""Mask PII in text."""
pii_prompt = PromptTemplate(self.pii_str_tmpl)
# TODO: allow customization
task_str = (
"Mask out the PII, replace each PII with a tag, and return the text. "
"Return the mapping in JSON."
)
response = self.llm.predict(pii_prompt, context_str=text, query_str=task_str)
splits = response.split("Output Mapping:")
text_output = splits[0].strip()
json_str_output = splits[1].strip()
json_dict = json.loads(json_str_output)
return text_output, json_dict
|