{ "total_labeled_cases": 988, "total_unlabeled_cases": -7, "1a) QA model answer rate in top 3": 0.856, "1b) QA model okay rate in top 3": 0.046, "1c) QA model bad rate in top 3": 0.098, "total labeled in top 3": 153, "1a) QA model answer rate in top 5": 0.787, "1b) QA model okay rate in top 5": 0.091, "1c) QA model bad rate in top 5": 0.122, "total labeled in top 5": 254, "2) QA model ndcg metric": 0.94, "3a) QA model bad answer placement error - median": 2.5, "3b) QA model bad answer placement error - 75th percentile": 3.757, "4a) QA model answer placement error - median": 1.967, "4b) QA model answer placement error - 75th percentile": 3.125 }