数据模型

eval_752 API 的请求/响应 schema。

Provider

{
  "id": "prov_abc123",
  "name": "OpenAI Primary",
  "surface": "api",
  "browser_target": null,
  "base_url": "https://api.openai.com/v1",
  "models": [
    {
      "model_id": "gpt-4o-mini",
      "display_name": "GPT-4o Mini"
    }
  ],
  "created_at": "2025-11-09T12:34:56Z",
  "updated_at": "2025-11-09T12:34:56Z"
}

由 Browser Harness 导入创建的 provider 只面向浏览器运行:

{
  "id": "prov_browser_123",
  "name": "ChatGPT Web",
  "surface": "browser",
  "browser_target": {
    "preset": "chatgpt",
    "origin": "https://chatgpt.com",
    "display_name": "ChatGPT Web"
  }
}

Dataset

{
  "id": "ds_xyz789",
  "display_name": "MMLU Sample",
  "source": "huggingface",
  "source_path": "cais/mmlu",
  "split": "test[:100]",
  "item_count": 100,
  "created_at": "2025-11-09T12:34:56Z"
}

Run

{
  "id": "run_def456",
  "label": "MMLU GPT-4 Test",
  "providerId": "prov_abc123",
  "providerName": "OpenAI Primary",
  "datasetId": "ds_xyz789",
  "datasetName": "MMLU Sample",
  "modelName": "gpt-4o-mini",
  "modelAlias": "benchmark",
  "status": "completed",
  "triggeredBy": "browser_harness",
  "retryCount": 0,
  "startedAt": "2025-11-09T12:35:00Z",
  "finishedAt": "2025-11-09T12:40:00Z",
  "config": {
    "variation": {
      "enabled": false,
      "per_item": 0,
      "strategies": []
    },
    "judge": {
      "provider_id": "prov_judge",
      "provider_name": "Judge API",
      "model": "gpt-4o-mini",
      "prompt": "Return 0 or 1.",
      "source": "browser_harness"
    }
  }
}

Browser Harness Pack

{
  "dataset_id": "ds_xyz789",
  "dataset_name": "Browser Harness Dataset",
  "version_hash": "hash-browser",
  "dataset_token": "signed-token",
  "judge_required": true,
  "blocked": false,
  "issues": [],
  "items": [
    {
      "dataset_item_id": "item-1",
      "sequence": 1,
      "prompt_text": "What is the capital of France?",
      "scoring": {
        "eligible": true,
        "requires_judge": true
      }
    }
  ]
}

Browser Harness Import Response

{
  "run_id": "run-001",
  "provider_id": "prov_browser_123",
  "provider_name": "ChatGPT Web",
  "dataset_reused": true,
  "scoring_queued": true
}

Active Run Snapshot

{
  "run": {
    "id": "run_def456",
    "providerName": "OpenAI Primary",
    "datasetName": "MMLU Sample",
    "modelName": "gpt-4o-mini",
    "status": "running"
  },
  "progress": {
    "completed": 40,
    "total": 100,
    "correct": 31,
    "incorrect": 9,
    "pending": 60
  },
  "currentItem": {
    "sequence": 41,
    "state": "running",
    "question": "What is the correct answer?",
    "promptText": "What is the correct answer?",
    "choices": ["A", "B", "C", "D"],
    "assets": {
      "image": {
        "path": "asset-41.png"
      }
    }
  },
  "recentItems": []
}

Run Item Group

{
  "itemId": "item-41",
  "sequence": 41,
  "state": "completed",
  "sectionName": "Biology",
  "question": "What is the correct answer?",
  "promptText": "What is the correct answer?",
  "choices": ["A", "B", "C", "D"],
  "assets": null,
  "promptPayload": {
    "messages": []
  },
  "answerPayload": {
    "label": "B"
  },
  "primary": {
    "id": "run-item-primary",
    "response": "B",
    "score": 1.0,
    "latencyMs": 1822
  },
  "variations": []
}

完整 schema 请参考 OpenAPI spec