{
  "schema_version": 2,
  "release_id": "site-2026-03-06-followup-r2-paired",
  "compiled_at": "2026-03-06T16:52:49+00:00",
  "selection_spec": {
    "job": {
      "name": "site_dashboard_followup_r2_paired_v1",
      "job_run_id": "2026-03-06-175249-72faa800b59e",
      "created_at": "2026-03-06T16:52:49+00:00",
      "selected_runs": 6
    },
    "selection": {
      "root_id": "c9b5fbf028610744",
      "require_completed": true,
      "exclude_partial_results": true,
      "require_capabilities": [
        "has_proof_term",
        "has_proof_term_pretty",
        "has_assembly_trace",
        "has_proof_term_metrics"
      ],
      "order_by": "created_at_desc",
      "dedupe_run_id": true,
      "max_runs": 24
    },
    "selection_meta": {},
    "selection_stats": {
      "raw_rows": 14,
      "resolved_rows": 14,
      "filtered_rows": 6,
      "selected_rows": 6,
      "require_completed": true,
      "exclude_partial_results": true,
      "require_capabilities": [
        "has_proof_term",
        "has_proof_term_pretty",
        "has_assembly_trace",
        "has_proof_term_metrics"
      ],
      "order_by": "created_at_desc",
      "dedupe_run_id": true,
      "max_runs": 24
    },
    "reference": {}
  },
  "lake_snapshot": {
    "fingerprint": {
      "exists": true,
      "path": "/Volumes/Addenda/dev/specter-labs/wonton-soup/artifacts/lake/lake.duckdb",
      "bytes": 65024000,
      "mtime_ns": 1772815969564585007,
      "sha256": "b56f7a01dd8ee15ffb8b699d990ddfa81a59dcd294e1d9c3a6e9a291cbca1950"
    },
    "db_path": "/Volumes/Addenda/dev/specter-labs/wonton-soup/artifacts/lake/lake.duckdb"
  },
  "notes": [
    "March 2026 paired follow-up cohort with full proof-term and assembly-trace coverage across providers."
  ],
  "runs": [
    {
      "id": "p2-paired__provider=deepseek__control=centralized",
      "label": "control=centralized | deepseek | artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0 | tiers 10,50,200,1000 | 10 thm | wild 10.0% | int 15.5%",
      "dashboard": "data/p2-paired__provider=deepseek__control=centralized/dashboard_v2.json",
      "meta": {
        "theorem_count": 10,
        "crashed_count": 0,
        "wild_type_solve_rate": 0.1,
        "intervention_solve_rate": 0.15517241379310345,
        "created_at": "2026-03-04T16:52:40",
        "mode": "research",
        "corpus": "artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0",
        "budget_label": "standard",
        "providers": [
          "deepseek"
        ],
        "provider_label": "deepseek+block_easy",
        "settings_summary": "tiers 10,50,200,1000 | wk 1 | mcts centralized",
        "status": "completed",
        "partial_results": false,
        "goal_id_scheme": "checkpoint",
        "capabilities": {
          "has_proof_term": true,
          "has_proof_term_pretty": true,
          "has_assembly_trace": true,
          "has_proof_term_metrics": true
        }
      }
    },
    {
      "id": "p2-paired__provider=deepseek__distributed-a8-i64",
      "label": "distributed-a8-i64 | deepseek | artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0 | tiers 10,50,200,1000 | 10 thm | wild 20.0% | int 25.0%",
      "dashboard": "data/p2-paired__provider=deepseek__distributed-a8-i64/dashboard_v2.json",
      "meta": {
        "theorem_count": 10,
        "crashed_count": 0,
        "wild_type_solve_rate": 0.2,
        "intervention_solve_rate": 0.25,
        "created_at": "2026-03-05T01:01:58",
        "mode": "research",
        "corpus": "artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0",
        "budget_label": "standard",
        "providers": [
          "deepseek"
        ],
        "provider_label": "deepseek+block_easy",
        "settings_summary": "tiers 10,50,200,1000 | wk 1 | mcts distributed",
        "status": "completed",
        "partial_results": false,
        "goal_id_scheme": "checkpoint",
        "capabilities": {
          "has_proof_term": true,
          "has_proof_term_pretty": true,
          "has_assembly_trace": true,
          "has_proof_term_metrics": true
        }
      }
    },
    {
      "id": "p2-paired__provider=heuristic__control=centralized",
      "label": "control=centralized | heuristic | artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0 | tiers 10,50,200,1000 | 10 thm | wild 30.0% | int 4.3%",
      "dashboard": "data/p2-paired__provider=heuristic__control=centralized/dashboard_v2.json",
      "meta": {
        "theorem_count": 10,
        "crashed_count": 0,
        "wild_type_solve_rate": 0.3,
        "intervention_solve_rate": 0.042735042735042736,
        "created_at": "2026-03-04T16:34:31",
        "mode": "research",
        "corpus": "artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0",
        "budget_label": "standard",
        "providers": [
          "heuristic"
        ],
        "provider_label": "heuristic+block_easy",
        "settings_summary": "tiers 10,50,200,1000 | wk 1 | mcts centralized",
        "status": "completed",
        "partial_results": false,
        "goal_id_scheme": "checkpoint",
        "capabilities": {
          "has_proof_term": true,
          "has_proof_term_pretty": true,
          "has_assembly_trace": true,
          "has_proof_term_metrics": true
        }
      }
    },
    {
      "id": "p2-paired__provider=heuristic__distributed-a8-i64",
      "label": "distributed-a8-i64 | heuristic | artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0 | tiers 10,50,200,1000 | 10 thm | wild 30.0% | int 4.3%",
      "dashboard": "data/p2-paired__provider=heuristic__distributed-a8-i64/dashboard_v2.json",
      "meta": {
        "theorem_count": 10,
        "crashed_count": 0,
        "wild_type_solve_rate": 0.3,
        "intervention_solve_rate": 0.04310344827586207,
        "created_at": "2026-03-04T16:43:36",
        "mode": "research",
        "corpus": "artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0",
        "budget_label": "standard",
        "providers": [
          "heuristic"
        ],
        "provider_label": "heuristic+block_easy",
        "settings_summary": "tiers 10,50,200,1000 | wk 1 | mcts distributed",
        "status": "completed",
        "partial_results": false,
        "goal_id_scheme": "checkpoint",
        "capabilities": {
          "has_proof_term": true,
          "has_proof_term_pretty": true,
          "has_assembly_trace": true,
          "has_proof_term_metrics": true
        }
      }
    },
    {
      "id": "p2-paired__provider=reprover__control=centralized",
      "label": "control=centralized | reprover | artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0 | tiers 10,50,200,1000 | 10 thm | wild 20.0% | int 13.3%",
      "dashboard": "data/p2-paired__provider=reprover__control=centralized/dashboard_v2.json",
      "meta": {
        "theorem_count": 10,
        "crashed_count": 0,
        "wild_type_solve_rate": 0.2,
        "intervention_solve_rate": 0.13333333333333333,
        "created_at": "2026-03-04T16:19:03",
        "mode": "research",
        "corpus": "artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0",
        "budget_label": "standard",
        "providers": [
          "reprover"
        ],
        "provider_label": "reprover+block_easy",
        "settings_summary": "tiers 10,50,200,1000 | wk 1 | mcts centralized",
        "status": "completed",
        "partial_results": false,
        "goal_id_scheme": "checkpoint",
        "capabilities": {
          "has_proof_term": true,
          "has_proof_term_pretty": true,
          "has_assembly_trace": true,
          "has_proof_term_metrics": true
        }
      }
    },
    {
      "id": "p2-paired__provider=reprover__distributed-a8-i64",
      "label": "distributed-a8-i64 | reprover | artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0 | tiers 10,50,200,1000 | 10 thm | wild 20.0% | int 13.0%",
      "dashboard": "data/p2-paired__provider=reprover__distributed-a8-i64/dashboard_v2.json",
      "meta": {
        "theorem_count": 10,
        "crashed_count": 0,
        "wild_type_solve_rate": 0.2,
        "intervention_solve_rate": 0.13043478260869565,
        "created_at": "2026-03-04T16:25:20",
        "mode": "research",
        "corpus": "artifact:mathlib4@973f0905e770bc0f6b5b49bef6c69d00cbc0e00d7cdcdd87071d61774f117ff2:derived/feasible@57da946a8182fdacd5b27ed357cf2601a762e2f76059815f1396f6d3aa385da0",
        "budget_label": "standard",
        "providers": [
          "reprover"
        ],
        "provider_label": "reprover+block_easy",
        "settings_summary": "tiers 10,50,200,1000 | wk 1 | mcts distributed",
        "status": "completed",
        "partial_results": false,
        "goal_id_scheme": "checkpoint",
        "capabilities": {
          "has_proof_term": true,
          "has_proof_term_pretty": true,
          "has_assembly_trace": true,
          "has_proof_term_metrics": true
        }
      }
    }
  ],
  "default_run": "p2-paired__provider=deepseek__control=centralized"
}
