.. |
eval_academic_leaderboard_202407.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_academic_leaderboard_202412.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_academic_leaderboard_202502.py
|
[Update] Academic bench llm judge update (#1876)
|
2025-02-24 15:45:24 +08:00 |
eval_alaya.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_api_demo.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_attack.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_babilong.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_base_demo.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_bluelm_32k_lveval.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_cascade_evaluator.py
|
[Update] Add CascadeEvaluator with Data Replica (#2022)
|
2025-05-20 16:46:55 +08:00 |
eval_charm_mem.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_charm_rea.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_chat_agent_baseline.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_chat_agent.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_chat_demo.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_chat_last.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_chembench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_chinese_simpleqa.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_cibench_api.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_cibench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_circular.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_claude.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_code_passk_repeat_dataset.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_code_passk.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_codeagent.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_codebench_full.py
|
[Update] History code bench pass@k update (#2102)
|
2025-05-19 17:03:33 +08:00 |
eval_codegeex2.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_compassarena_subjectivebench_bradleyterry.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_compassarena_subjectivebench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_contamination.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_corebench_2409_base_objective.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_corebench_2409_chat_objective.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_corebench_2409_longcontext.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_corebench_2409_subjective.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_deepseek_r1.py
|
[Update] Support OlympiadBench-Math/OmniMath/LiveMathBench-Hard (#1899)
|
2025-03-03 18:56:11 +08:00 |
eval_dingo.py
|
[Dataset] Update dingo 1.5.0 (#2008)
|
2025-04-07 17:21:15 +08:00 |
eval_ds1000_interpreter.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_edgellm_demo.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_gpt3.5.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_gpt4.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_hellobench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_hf_llama2.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_hf_llama_7b.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_inference_ppl.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internlm2_chat_keyset.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internlm2_keyset.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internlm3_math500_thinking.py
|
[Demo] Internlm3 math500 thinking demo (#1846)
|
2025-01-24 14:56:41 +08:00 |
eval_internlm_7b.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internlm_chat_lmdeploy_apiserver.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internlm_chat_turbomind.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internlm_flames_chat.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internlm_lmdeploy_apiserver.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internlm_math_chat.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internlm_turbomind.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_internLM.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_judge_dataset_all.py
|
[Add] add a config to Judge dataset all (#2077)
|
2025-05-07 10:57:23 +08:00 |
eval_judgebench.py
|
add Judgebench (#2066)
|
2025-04-30 15:01:10 +08:00 |
eval_judgerbench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_judgerbenchv2.py
|
[Add] Add Judgerbenchv2 (#2067)
|
2025-04-30 17:12:34 +08:00 |
eval_korbench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_lightllm.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_livestembench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_llama2_7b_lveval.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_llama2_7b.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_llama3_instruct.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_llm_compression.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_llm_judge.py
|
[Feature] Add general math, llm judge evaluator (#1892)
|
2025-02-26 15:08:50 +08:00 |
eval_lmdeploy_demo.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_longbenchv2.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_math_llm_judge_internal.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_math_llm_judge.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_math_verify.py
|
[Feature] Math Verify with model post_processor (#1881)
|
2025-02-20 19:32:12 +08:00 |
eval_mathbench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_mmlu_cf.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_mmlu_pro.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_mmlu_with_zero_retriever_overwritten.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_modelscope_datasets.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_multi_prompt_demo.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_musr.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_needlebench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_OlympiadBench.py
|
[Feature] Support OlympiadBench Benchmark (#1841)
|
2025-01-24 10:00:01 +08:00 |
eval_PMMEval.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_qwen3.py
|
[Update] Add CascadeEvaluator with Data Replica (#2022)
|
2025-05-20 16:46:55 +08:00 |
eval_qwen_7b_chat_lawbench.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_qwen_7b_chat.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_qwen_7b.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_rewardbench.py
|
[Add] add rewardbench (#2029)
|
2025-04-21 17:18:51 +08:00 |
eval_rmb.py
|
add RMB Bench (#2056)
|
2025-04-27 16:26:01 +08:00 |
eval_ruler_fix_tokenizer.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_ruler.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_rwkv5_3b.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_simpleqa.py
|
[Update] Support AIME-24 Evaluation for DeepSeek-R1 series (#1888)
|
2025-02-25 20:34:41 +08:00 |
eval_subjective_alpacaeval_official.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_subjective_bradleyterry.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_subjective.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_teval.py
|
[Dataset] Add SuperGPQA subfield configs (#2124)
|
2025-05-28 14:12:58 +08:00 |
eval_TheoremQA.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |
eval_with_model_dataset_combinations.py
|
[Refactor] Code refactoarization (#1831)
|
2025-01-20 19:17:38 +08:00 |