{"document_version":"v8.0","current_judge_prompt_hash":"ff59a2d86860e0c738d3b6aeba4565000213c9ac63c7a00bc057e44cc27658c8","rubric":"StrongREJECT (Souly et al., NeurIPS 2024, arXiv:2402.10260)","harness_repo_path":"backend/benchmarks/","groundtruth_repo_path":"backend/benchmarks/judge_groundtruth/","generated_at":"2026-06-10T14:28:58.536965+00:00","published":false,"status":"pending_calibration","note":"The benchmark harness and ground-truth set are in the repo; the public numbers are published after a reviewed real-jury run."}