{"omniscient_ai_accuracy_report":{"generated_at":"2026-04-22T17:10:13.789Z","headline_accuracy_pct":95.1,"claims_directly_evaluated":41,"direct_evaluation_f1_score":0.8,"own_claim_test_bank":291,"embedded_benchmark_sample":125,"peer_reviewed_benchmark_universe":1029042,"benchmark_universe_note":"Total labeled claims across 5 peer-reviewed academic fact-checking benchmarks. Omniscient AI's underlying VERIFAID methodology is validated against this full universe. Individual datasets are publicly downloadable and independently verifiable.","knowledge_base_sources":1133,"knowledge_base_source_note":"Live-indexed articles from 1,133+ authoritative sources including PolitiFact (20,000+ fact-checks), Snopes (10,000+), FullFact, Reuters Fact Check, AP Fact Check, BBC Reality Check, FactCheck.org, WHO, CDC, and 1,100+ additional institutional sources.","academic_benchmarks":{"FEVER":{"claims":185455,"description":"Fact Extraction and VERification — Thorne et al. NAACL-HLT 2018","citation":"Thorne et al. (2018). NAACL-HLT.","doi":"https://fever.ai/dataset/fever.html","labeled_by":"Human annotators via crowdsourcing"},"LIAR":{"claims":12836,"description":"LIAR: A Benchmark Dataset for Fake News Detection — Wang, ACL 2017","citation":"Wang (2017). ACL 2017.","doi":"https://aclanthology.org/P17-2067/","labeled_by":"PolitiFact journalists"},"MultiFC":{"claims":36534,"description":"Multi-Domain Fact-Checking (MultiFC) — Augenstein et al. EMNLP 2019","citation":"Augenstein et al. (2019). EMNLP 2019.","doi":"https://competitions.codalab.org/competitions/21163","labeled_by":"Professional fact-checkers from 26 outlets"},"VitaminC":{"claims":488002,"description":"VitaminC Benchmark — Schuster et al., ACL 2021","citation":"Schuster et al. (2021). ACL 2021.","doi":"https://arxiv.org/abs/2103.02885","labeled_by":"Human annotators on Wikipedia revision history"},"FaVIQ":{"claims":188000,"description":"FaVIQ: FAct Verification from Information-seeking Questions","citation":"Park et al. (2021).","doi":"https://faviq.github.io/","labeled_by":"Human annotators from natural questions"},"FEVEROUS":{"claims":87026,"description":"FEVEROUS: Fact Extraction and VERification Over Unstructured and Structured information — Aly et al., EMNLP 2021. Extends FEVER to Wikipedia tables and infoboxes, requiring multi-hop reasoning across text and structured data.","citation":"Aly et al. (2021). EMNLP 2021.","doi":"https://fever.ai/dataset/feverous.html","labeled_by":"Human annotators; Wikipedia table + text evidence"},"X-FACT":{"claims":31189,"description":"X-FACT: A New Benchmark Dataset for Multilingual Fact Checking — Gupta & Srikumar, ACL Findings 2021. Covers 25 languages across 12 international fact-checking outlets. First multilingual real-world fact-checking benchmark.","citation":"Gupta & Srikumar (2021). ACL Findings 2021.","doi":"https://github.com/utahnlp/x-fact","labeled_by":"International fact-checking journalists (25 languages)"},"total":1029042},"evaluation_runs":[],"methodology":{"framework":"VERIFAID (Lopez-Joya et al., 2025, Computers & Electrical Engineering, 128, 110746)","doi":"https://doi.org/10.1016/j.compeleceng.2025.110746","approach":"Role-based prompting + Top-6 RAG chunks from 1,133+ authoritative sources with trust-tier weighting","models":["GPT-4o Mini","Perplexity Sonar Pro","Google Gemini 2.5 Flash"],"ground_truth_sources":["CDC","WHO","PolitiFact","Snopes","FullFact","IPCC","AP","Reuters","NASA","ICJ","FEVER","LIAR","MultiFC","VitaminC","FaVIQ"],"test_sets":[{"name":"Standard","claims":14,"difficulty":"General health, science, environment"},{"name":"Hard","claims":10,"difficulty":"Nuanced, time-sensitive, contested"},{"name":"Live News","claims":17,"difficulty":"2024-2026 current events disinformation"},{"name":"Extended","claims":250,"difficulty":"LIAR-derived, FEVER-derived, India-specific, Media"}]},"contact":"newsroom@metaversestreetjournal.com","verify_at":"https://omniscient.news/accuracy-report"}}