#数据模型
eval_752 API 的请求/响应 schema。
#Provider
{
"id": "prov_abc123",
"name": "OpenAI Primary",
"surface": "api",
"browser_target": null,
"base_url": "https://api.openai.com/v1",
"models": [
{
"model_id": "gpt-4o-mini",
"display_name": "GPT-4o Mini"
}
],
"created_at": "2025-11-09T12:34:56Z",
"updated_at": "2025-11-09T12:34:56Z"
}由 Browser Harness 导入创建的 provider 只面向浏览器运行:
{
"id": "prov_browser_123",
"name": "ChatGPT Web",
"surface": "browser",
"browser_target": {
"preset": "chatgpt",
"origin": "https://chatgpt.com",
"display_name": "ChatGPT Web"
}
}#Dataset
{
"id": "ds_xyz789",
"display_name": "MMLU Sample",
"source": "huggingface",
"source_path": "cais/mmlu",
"split": "test[:100]",
"item_count": 100,
"created_at": "2025-11-09T12:34:56Z"
}#Run
{
"id": "run_def456",
"label": "MMLU GPT-4 Test",
"providerId": "prov_abc123",
"providerName": "OpenAI Primary",
"datasetId": "ds_xyz789",
"datasetName": "MMLU Sample",
"modelName": "gpt-4o-mini",
"modelAlias": "benchmark",
"status": "completed",
"triggeredBy": "browser_harness",
"retryCount": 0,
"startedAt": "2025-11-09T12:35:00Z",
"finishedAt": "2025-11-09T12:40:00Z",
"config": {
"variation": {
"enabled": false,
"per_item": 0,
"strategies": []
},
"judge": {
"provider_id": "prov_judge",
"provider_name": "Judge API",
"model": "gpt-4o-mini",
"prompt": "Return 0 or 1.",
"source": "browser_harness"
}
}
}#Browser Harness Pack
{
"dataset_id": "ds_xyz789",
"dataset_name": "Browser Harness Dataset",
"version_hash": "hash-browser",
"dataset_token": "signed-token",
"judge_required": true,
"blocked": false,
"issues": [],
"items": [
{
"dataset_item_id": "item-1",
"sequence": 1,
"prompt_text": "What is the capital of France?",
"scoring": {
"eligible": true,
"requires_judge": true
}
}
]
}#Browser Harness Import Response
{
"run_id": "run-001",
"provider_id": "prov_browser_123",
"provider_name": "ChatGPT Web",
"dataset_reused": true,
"scoring_queued": true
}#Active Run Snapshot
{
"run": {
"id": "run_def456",
"providerName": "OpenAI Primary",
"datasetName": "MMLU Sample",
"modelName": "gpt-4o-mini",
"status": "running"
},
"progress": {
"completed": 40,
"total": 100,
"correct": 31,
"incorrect": 9,
"pending": 60
},
"currentItem": {
"sequence": 41,
"state": "running",
"question": "What is the correct answer?",
"promptText": "What is the correct answer?",
"choices": ["A", "B", "C", "D"],
"assets": {
"image": {
"path": "asset-41.png"
}
}
},
"recentItems": []
}#Run Item Group
{
"itemId": "item-41",
"sequence": 41,
"state": "completed",
"sectionName": "Biology",
"question": "What is the correct answer?",
"promptText": "What is the correct answer?",
"choices": ["A", "B", "C", "D"],
"assets": null,
"promptPayload": {
"messages": []
},
"answerPayload": {
"label": "B"
},
"primary": {
"id": "run-item-primary",
"response": "B",
"score": 1.0,
"latencyMs": 1822
},
"variations": []
}完整 schema 请参考 OpenAPI spec。
