Skip to content

imatrix: add option to display importance score statistics for a given imatrix file #12718

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 40 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d8e902e
Add --show-statistics option
EAddario Apr 1, 2025
f46693b
Add --show-statistics logic
EAddario Apr 1, 2025
b3ac78b
Merge branch 'master' into imatrix
EAddario Apr 1, 2025
dc3373e
Add tensor name parsing
EAddario Apr 2, 2025
0589c3e
Tidy output format
EAddario Apr 2, 2025
e1fd1af
Fix typo in title
EAddario Apr 2, 2025
490a8fe
Merge branch 'master' into imatrix
EAddario Apr 7, 2025
62ac268
Improve tensor influence ranking
EAddario Apr 8, 2025
73d8ecb
Add better statistics
EAddario Apr 13, 2025
200d88c
Merge branch 'master' into imatrix
EAddario Apr 13, 2025
0b7f9c4
Change statistics' sort order
EAddario Apr 15, 2025
52e86e2
Merge branch 'master' into imatrix
EAddario Apr 15, 2025
91d48da
Merge branch 'master' into imatrix
EAddario Apr 19, 2025
755c1ef
Add Cosine Similarity
EAddario Apr 22, 2025
72a5ec1
Merge branch 'master' into imatrix
EAddario May 3, 2025
5cd20e4
Add header search path
EAddario May 3, 2025
1dbe6c3
Change header search path to private
EAddario May 3, 2025
bb47f0d
Merge branch 'master' into imatrix
EAddario May 11, 2025
a3ac66c
Merge branch 'master' into imatrix
EAddario May 25, 2025
3eb556e
Add weighted statistics per layer
EAddario May 25, 2025
0276d71
Merge branch 'master' into imatrix
EAddario Jun 3, 2025
1f8dc23
Merge branch 'master' into imatrix
EAddario Jun 13, 2025
8ecd5fa
Merge branch 'master' into imatrix
EAddario Jun 14, 2025
8302a8a
Merge branch 'master' into imatrix
EAddario Jun 15, 2025
bfc0dfc
Merge branch 'master' into imatrix
EAddario Jun 21, 2025
5cfc443
Update report title
EAddario Jun 21, 2025
280dfdd
Merge branch 'master' into imatrix
EAddario Jun 22, 2025
235442a
Refactor compute_statistics out of main
EAddario Jun 22, 2025
c823d16
Refactor compute_cossim out of load_imatrix
EAddario Jun 22, 2025
a5c4640
Refactor compute_statistics out of load_imatrix
EAddario Jun 22, 2025
655be19
Move imatrix statistics calculation into its own functions
EAddario Jun 22, 2025
23ecca8
Add checks and validations
EAddario Jun 22, 2025
a4166a8
Remove unnecessary include directory
EAddario Jun 22, 2025
ed4ba31
Merge branch 'master' into imatrix
EAddario Jun 23, 2025
19f8e15
Rename labels
EAddario Jun 24, 2025
f5fd2b7
Add m_stats getter and refactor compute_statistics out of load_imatrix
EAddario Jun 24, 2025
bc3bd57
Refactor variable names
EAddario Jun 24, 2025
c3ede42
Merge branch 'master' into imatrix
EAddario Jun 24, 2025
1389753
Merge branch 'master' into imatrix
EAddario Jun 29, 2025
fde3089
Minor cosmetic change
EAddario Jun 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2647,6 +2647,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.i_chunk = value;
}
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
add_opt(common_arg(
{"--show-statistics"},
string_format("show imatrix statistics and then exit (default: %s)", params.show_statistics ? "true" : "false"),
[](common_params & params) {
params.show_statistics = true;
}
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
add_opt(common_arg(
{"--parse-special"},
string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
Expand Down
7 changes: 4 additions & 3 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,9 +416,10 @@ struct common_params {
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
int32_t i_chunk = 0; // start processing from this chunk

bool process_output = false; // collect data for the output tensor
bool compute_ppl = true; // whether to compute perplexity
bool parse_special = false; // whether to parse special tokens during imatrix tokenization
bool process_output = false; // collect data for the output tensor
bool compute_ppl = true; // whether to compute perplexity
bool show_statistics = false; // show imatrix statistics per tensor
bool parse_special = false; // whether to parse special tokens during imatrix tokenization

// cvector-generator params
int n_pca_batch = 100;
Expand Down
1 change: 1 addition & 0 deletions tools/imatrix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ set(TARGET llama-imatrix)
add_executable(${TARGET} imatrix.cpp)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_include_directories(${TARGET} PRIVATE ../../src)
target_compile_features(${TARGET} PRIVATE cxx_std_17)
226 changes: 218 additions & 8 deletions tools/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,20 @@
#include <vector>
#include <fstream>
#include <unordered_map>
#include <map>
#include <algorithm>
#include <regex>
#include <numeric>

#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif

static void print_usage(int, char ** argv) {
LOG("\nexample usage:\n");
LOG("\n %s \\\n"
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n"
LOG("\n %s -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output]\n"
" [--chunk 123] [--output-frequency 10] [--save-frequency 0] [--show-statistics]\n"
" [--no-ppl] [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n"
" [--parse-special]\n" , argv[0]);
LOG("\n");
}
Expand All @@ -35,13 +37,28 @@ struct Stats {
int ncall = 0;
};

struct tensor_statistics {
std::string tensor;
Stats stats;
float total_bias = 0;
float mean_bias = 0;
float max_bias = 0;
float min_bias = 0;
int elements = 0;
float stddev = 0;
float active = 0;
float entropy = 0;
float zd = 0;
float cossim = 0;
};

class IMatrixCollector {
public:
IMatrixCollector() = default;
void set_params(common_params params) { m_params = std::move(params); }
bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data);
void save_imatrix(int ncall = -1) const;
bool load_imatrix(const char * fname);
bool load_imatrix(const char * fname, std::vector<tensor_statistics> * tstats = nullptr);
private:
std::unordered_map<std::string, Stats> m_stats;
common_params m_params;
Expand Down Expand Up @@ -70,6 +87,35 @@ static std::string filter_tensor_name(const char * name) {
return wname;
}

static void process_tensor_name(const std::string & input, std::string & layer, std::string & tensor) {
std::vector<std::string> name;
std::istringstream stream(input);
std::string item;

while (std::getline(stream, item, '.')) {
name.push_back(item);
}
for (size_t i = 0; i < name.size(); ++i) {
if (name[i] == "blk" && i + 1 < name.size()) {
layer = name[i + 1];
break;
}
}
for (size_t i = 0; i < name.size(); ++i) {
if (name[i] == "weight" && i > 0) {
tensor = name[i - 1];
break;
}
}

if (tensor.empty()) {
tensor = input;
}
if (layer.empty()) {
layer = "-";
}
}

bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
GGML_UNUSED(user_data);

Expand Down Expand Up @@ -292,7 +338,7 @@ void IMatrixCollector::save_imatrix(int ncall) const {
LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str());
}

bool IMatrixCollector::load_imatrix(const char * fname) {
bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_statistics> * tstats) {
std::ifstream in(fname, std::ios::binary);
if (!in) {
LOG_ERR("%s: failed to open %s\n",__func__, fname);
Expand Down Expand Up @@ -338,14 +384,81 @@ bool IMatrixCollector::load_imatrix(const char * fname) {
return false;
}

// Recreate the state as expected by save_imatrix(), and corerct for weighted sum.
// Recreate the state as expected by save_imatrix(), and correct for weighted sum.
std::vector<float> activations;
activations.reserve(nval);
for (int i = 0; i < nval; i++) {
e.values[i] += tmp[i];
e.counts[i] += ncall;
activations.push_back(e.values[i] / e.counts[i]);
}
e.ncall += ncall;

if (tstats) {
float total = std::accumulate(activations.begin(), activations.end(), 0.0f);
float max = * std::max_element(activations.begin(), activations.end());
float min = * std::min_element(activations.begin(), activations.end());
float mean = total / activations.size();
float sq_total = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f);
float dev = std::sqrt((sq_total / activations.size()) - (mean * mean));

float threshold = min + min * 0.5f;
int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabs(v) <= threshold; });
float active_ratio = 1 - static_cast<float>(inactive_count) / activations.size();

float ent = 0;
if (total > 0) {
for (auto act : activations) {
if (float p = act / total; p > 0) {
ent -= p* std::log2(p);
}
}
}

int z_score = 0;
for (auto act : activations) {
if (float p = (act - mean) / dev; p > 1) {
z_score++;
}
}

tstats->emplace_back();
auto & ts = (*tstats)[i];
ts.tensor = name_as_vec.data();
ts.stats = e;
ts.total_bias = total;
ts.mean_bias = mean;
ts.max_bias = max;
ts.min_bias = min;
ts.elements = static_cast<int>(activations.size());
ts.stddev = dev;
ts.active = active_ratio;
ts.entropy = ent;
ts.zd = static_cast<float>(z_score) / ts.elements;
}
}

if (tstats) {
static const std::regex pattern(R"(blk\.(\d+)\.)");
for (auto & ts : *tstats) {
if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) {
const int blk = std::stoi(match[1]);
std::string tname(ts.tensor);
tname.replace(match.position(1), match.length(1), std::to_string(blk-1));
auto prev = std::find_if(tstats->begin(), tstats->end(), [tname](const tensor_statistics & t) { return t.tensor == tname; });
if (prev != tstats->end()) {
const float dp = std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), prev->stats.values.begin(), 0.0f);
const float curr_mag = std::sqrt(std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), ts.stats.values.begin(), 0.0f));
const float prev_mag = std::sqrt(std::inner_product(prev->stats.values.begin(), prev->stats.values.end(), prev->stats.values.begin(), 0.0f));
const float cs = dp / (curr_mag * prev_mag);
ts.cossim = cs;
}
} else {
ts.cossim = 0;
}
}
}

return true;
}

Expand All @@ -355,7 +468,6 @@ static bool ik_collect_imatrix(struct ggml_tensor * t, bool ask, void * user_dat
return g_collector.collect_imatrix(t, ask, user_data);
}


struct results_log_softmax {
double log_softmax;
float logit;
Expand Down Expand Up @@ -592,6 +704,104 @@ int main(int argc, char ** argv) {
return 1;
}

std::vector<tensor_statistics> ts;

if (params.show_statistics) {
if (params.in_files.empty() || params.in_files.size() > 1) {
LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n");
return 1;
}
if (!g_collector.load_imatrix(params.in_files[0].c_str(), & ts)) {
LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str());
return 1;
}
if (ts.empty()) {
LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str());
return 1;
}

struct tensor_comparer {
bool operator()(const tensor_statistics & a, const tensor_statistics & b) const {
std::string layer, name_a, name_b;;
process_tensor_name(a.tensor, layer, name_a);
process_tensor_name(b.tensor, layer, name_b);
return name_a < name_b || (name_a == name_b && a.total_bias > b.total_bias);
}
};
std::sort(ts.begin(), ts.end(), tensor_comparer());

struct weighted_stats {
float weighted_bias = 0.0f;
float weighted_zd = 0.0f;
float weighted_cossim = 0.0f;
int total_elements = 0;
};
std::map<int, weighted_stats> ws;

LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
LOG_INF("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
" Layer", " Tensor", " Σ(Bias)", " Min", " Max", " μ", " σ", " % Active", "N", " Entropy", "E (norm)", "ZD", " CosSim");
LOG_INF("=========================================================================================================================================================================\n");
for (const auto & tstat : ts) {
std::string layer, name;
process_tensor_name(tstat.tensor, layer, name);

int blk;
try {
blk = std::stoi(layer);
} catch (const std::exception & e) {
blk = -1; // not a block layer
}

LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n",
layer.c_str(), name.c_str(), tstat.total_bias, tstat.min_bias, tstat.max_bias, tstat.mean_bias, tstat.stddev,
tstat.active * 100.0f, tstat.elements, tstat.entropy, 100.0f * (tstat.entropy / std::log2(tstat.elements)),
100.0f * tstat.zd, tstat.cossim);

const float weighted_bias = tstat.elements * tstat.total_bias;
const float weighted_zd = tstat.elements * tstat.zd;
const float weighted_cossim = tstat.elements * tstat.cossim;

if (ws.find(blk) != ws.end()) {
ws[blk].weighted_bias += weighted_bias;
ws[blk].weighted_zd += weighted_zd;
ws[blk].weighted_cossim += weighted_cossim;
ws[blk].total_elements += tstat.elements;
} else {
weighted_stats temp_ws;
temp_ws.weighted_bias = weighted_bias;
temp_ws.weighted_zd = weighted_zd;
temp_ws.weighted_cossim = weighted_cossim;
temp_ws.total_elements = tstat.elements;
ws[blk] = temp_ws;
}
}

const int layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; });
LOG_INF("\nComputing weighted average statistics per layer (%d layers)\n", layers);
LOG_INF("\n%s\t%s\t%s\t%s\n", " Layer", " μΣ(Bias)", " μZD", "μCosSim");
LOG_INF("===============================================\n");

for (const auto & [first, second] : ws) {
const auto & layer = first;
const auto & stats = second;

if (stats.total_elements == 0) continue;

if (layer >= 0) {
const float bias = stats.weighted_bias / stats.total_elements;
const float zd = stats.weighted_zd / stats.total_elements;
const float cossim = stats.weighted_cossim / stats.total_elements;

LOG_INF("%5d\t%14.2f\t%10.4f%%\t%6.4f\n", layer, bias, 100.0f * zd, cossim);
}
}

LOG_INF("\n");

return 0;
}

common_init();

params.n_batch = std::min(params.n_batch, params.n_ctx);
Expand Down
Loading