"expected": "fuzzy_match": "text": "The capital of France is Paris", "threshold": 0.85, "algorithm": "levenshtein"
import json from llm_client import query_model def run_pmlte(suite_path): suite = json.load(open(suite_path)) results = [] for test in suite["tests"]: prompt = suite["template"]["user"] for var, val in test["input_vars"].items(): prompt = prompt.replace(f"var", val) output = query_model(prompt, system=suite["template"].get("system")) passed = evaluate(output, test["expected"]) results.append("id": test["id"], "passed": passed, "output": output) return results 6.1 Multi-turn conversations "template": "conversation": [ "role": "system", "content": "You are a helpful assistant.", "role": "user", "content": "question1", "role": "assistant", "content": "expected_answer1", "role": "user", "content": "question2" ] "expected": "fuzzy_match": "text": "The capital of France is
A reference runner (Python pseudo-code): val) output = query_model(prompt