1
1
import torch
2
+ from tabulate import tabulate
2
3
3
4
from transformers import AutoModelForCausalLM , AutoTokenizer
4
-
5
- from lm_eval .models .huggingface import HFLM
6
- from lm_eval .evaluator import evaluate
7
- from lm_eval .tasks import get_task_dict
5
+ try :
6
+ from lm_eval .models .huggingface import HFLM
7
+ from lm_eval .evaluator import evaluate
8
+ from lm_eval .tasks import get_task_dict
9
+ except ImportError as e :
10
+ print ("""
11
+ Error: The 'lm_eval' module was not found.
12
+ To install, follow these steps:
13
+ pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git
14
+ """ )
15
+ raise # Re-raise the ImportError
8
16
9
17
from torchao .quantization .quant_api import (
10
18
change_linear_weights_to_int4_woqtensors ,
16
24
torch ._inductor .config .force_fuse_int_mm_with_mul = True
17
25
torch ._inductor .config .fx_graph_cache = True
18
26
27
+ def pretty_print_nested_results (results , precision : int = 6 ):
28
+ def format_value (value ):
29
+ if isinstance (value , float ):
30
+ return f"{ value :.{precision }f} "
31
+ return value
32
+
33
+ main_table = []
34
+ for task , metrics in results ["results" ].items ():
35
+ subtable = [[k , format_value (v )] for k , v in metrics .items () if k != 'alias' ]
36
+ subtable .sort (key = lambda x : x [0 ]) # Sort metrics alphabetically
37
+ formatted_subtable = tabulate (subtable , tablefmt = 'grid' )
38
+ main_table .append ([task , formatted_subtable ])
39
+
40
+ print (tabulate (main_table , headers = ['Task' , 'Metrics' ], tablefmt = 'grid' ))
41
+
19
42
def run_evaluation (repo_id , tasks , limit , device , precision , quantization , compile , batch_size , max_length ):
20
43
21
44
tokenizer = AutoTokenizer .from_pretrained (repo_id )
@@ -33,7 +56,6 @@ def run_evaluation(repo_id, tasks, limit, device, precision, quantization, compi
33
56
change_linear_weights_to_int4_woqtensors (model .to (device = device ))
34
57
elif quantization == "autoquant" :
35
58
model = autoquant (model .to (device = device ))
36
-
37
59
with torch .no_grad ():
38
60
result = evaluate (
39
61
HFLM (
@@ -44,8 +66,8 @@ def run_evaluation(repo_id, tasks, limit, device, precision, quantization, compi
44
66
get_task_dict (tasks ),
45
67
limit = limit ,
46
68
)
47
- for task , res in result [ "results" ]. items ():
48
- print ( f" { task } : { res } " )
69
+
70
+ pretty_print_nested_results ( result )
49
71
50
72
51
73
if __name__ == '__main__' :
0 commit comments