def prepare_evaluation_data(df, text_col="full_sentiment", label_col="label"):
"""Prepare data items for evaluation from DataFrame."""
return [{"item": {"input": str(row[text_col]), "ground_truth": row[label_col]}}
for _, row in df.iterrows()]
def prepare_evaluation_data(
df: pd.DataFrame,
text_col: str = "full_sentiment",
label_col: str = "label"
) -> list:
"""
Prepare evaluation data items from a DataFrame.
Args:
df: Input pandas DataFrame.
text_col: Column containing the input text.
label_col: Column containing the ground truth label.
Returns:
List of dicts formatted for evaluation.
"""
return [
{"item": {"input": str(row[text_col]), "ground_truth": row[label_col]}}
for _, row in df.iterrows()
]
def create_eval_run(evaluation_data: list, eval_id: str) -> str:
"""
Create and launch an evaluation run.
Args:
evaluation_data: List of evaluation items.
eval_id: The evaluation object ID.
Returns:
The run ID as a string.
"""
eval_config = {
"type": "completions",
"model": "gpt-4.1",
"input_messages": {
"type": "template",
"template": [
{
"type": "message",
"role": "user",
"content": {
"type": "input_text",
"text": (
"Classify the sentiment of this food delivery review: {{ item.input }}. "
"Categorize the request into one of \"positive\", \"negative\" or \"unclear\". "
"Respond with only one of those words."
)
}
}
]
},
"source": {
"type": "file_content",
"content": evaluation_data
}
}
run = client.evals.runs.create(
eval_id=eval_id,
data_source=eval_config
)
print("✅ Evaluation run created successfully")
print(f"Run ID: {run.id}")
return run.id