Precompute partitions Currently, we calculate the duplicate chain partitions in partition_dups_1. This is done twice, once for phase 1 and once for phase 2. Instead, calculate the partitions once in partition_dups, and use those for both calls to partition_dups_1. Performance tested on cc1. This causes a small performance regresssion: ... user: series: 5140 5100 5120 5130 5150 5160 5160 5180 5170 5130 \ 5180 5160 5090 5230 5140 5140 5210 5100 5170 5130 mean: 5149.50 (100%) stddev: 35.46 series: 5120 5190 5230 5190 5200 5160 5170 5210 5270 5180 \ 5270 5240 5200 5200 5200 5170 5150 5220 5180 5140 mean: 5194.50 (100.87%) stddev: 39.13 ... There's no significant increase of memory usage: ... mem: series: 1260512 1260456 1260492 1260368 1260608 1260268 1260656 1260488 \ 1260420 1260332 1260464 1260488 1260536 1260340 1260352 1260492 \ 1260268 1260276 1260316 1260316 mean: 1260422.40 (100%) stddev: 113.73 series: 1260456 1260296 1260244 1260360 1260584 1260344 1260548 1260388 \ 1260424 1260304 1260252 1260560 1260664 1260476 1260480 1260416 \ 1260580 1260504 1260604 1260324 mean: 1260440.40 (100.00%) ... We accept the small performance penalty because this patch is a prerequisite for the PR25424 bug fix. 2021-02-25 Tom de Vries * dwz.c (calculate_partitions): New function, factored out of ... (partition_dups_1): ... here. Drop vec_size parameter. Add nr_partitions and partitions parameter. Iterate over partitions array. (partition_dups): Call calculate_partitions. Update calls to partition_dups_1. --- dwz.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/dwz.c b/dwz.c index e71b3fa..f435428 100644 --- a/dwz.c +++ b/dwz.c @@ -7989,34 +7989,26 @@ cnt_ref_cus (dw_die_ref ref) multiple CUs might be worthwhile to be moved into partial units, construct those partial units. */ static bool -partition_dups_1 (dw_die_ref *arr, size_t vec_size, +partition_dups_1 (dw_die_ref *arr, size_t nr_partitions, size_t *partitions, dw_cu_ref *first_partial_cu, dw_cu_ref *last_partial_cu, bool second_phase) { - size_t i, j; + size_t i, j, cnt; bool ret = false; - for (i = 0; i < vec_size; i = j) + size_t idx = 0; + for (idx = 0; idx < nr_partitions * 2; idx += 2) { + i = partitions[idx]; + cnt = partitions[idx + 1]; + j = partitions[idx + 2]; + + if (arr[i]->die_dup != NULL) + continue; + dw_die_ref ref; - size_t cnt = 0, size = 0, k, orig_size, new_size, namespaces = 0; + size_t size = 0, k, orig_size, new_size, namespaces = 0; unsigned int force = 0; - if (arr[i]->die_dup != NULL) - { - j = i + 1; - continue; - } - for (j = i + 1; j < vec_size; j++) - { - size_t this_cnt; - if (!same_ref_cus_p (arr[i], arr[j], &this_cnt)) - break; - cnt = this_cnt; - } - if (stats_p && !second_phase) - stats->part_cnt++; - if (cnt == 0) - cnt = cnt_ref_cus (arr[i]); enum dwarf_source_language part_lang = gen_cu_p ? partition_lang (arr[i]) : 0; for (k = i; k < j; k++) @@ -8289,6 +8281,36 @@ partition_dups_1 (dw_die_ref *arr, size_t vec_size, return ret; } +/* Partition the duplicate chains in array ARR with size VEC_SIZE, and store + the partitions on obstack ob2, with for each partition two entries: + the start and the number of unique reffer CUs. */ +static void +calculate_partitions (dw_die_ref *arr, size_t vec_size) +{ + size_t i, j; + for (i = 0; i < vec_size; i = j) + { + size_t cnt = 0; + for (j = i + 1; j < vec_size; j++) + { + size_t this_cnt; + if (!same_ref_cus_p (arr[i], arr[j], &this_cnt)) + break; + cnt = this_cnt; + } + if (cnt == 0) + cnt = cnt_ref_cus (arr[i]); + obstack_grow (&ob2, &i, sizeof (size_t)); + obstack_grow (&ob2, &cnt, sizeof (size_t)); + } + + /* Add element to mark end of partition list. This allows us to do + 'j = partitions[idx + 2]' for all partitions. */ + obstack_grow (&ob2, &j, sizeof (size_t)); + size_t zero = 0; + obstack_grow (&ob2, &zero, sizeof (size_t)); +} + static inline void FORCE_INLINE reset_die_ref_seen (void) { @@ -8443,7 +8465,16 @@ partition_dups (void) report_progress (); fprintf (stderr, "partition_dups after qsort\n"); } - if (partition_dups_1 (arr, vec_size, &first_partial_cu, + + size_t *partitions = (size_t *) obstack_base (&ob2); + calculate_partitions (arr, vec_size); + size_t nr_partitions + = (obstack_object_size (&ob2) / sizeof (size_t)) / 2 - 1; + partitions = (size_t *) obstack_finish (&ob2); + if (stats_p) + stats->part_cnt += nr_partitions; + + if (partition_dups_1 (arr, nr_partitions, partitions, &first_partial_cu, &last_partial_cu, false)) { for (i = 0; i < vec_size; i++) @@ -8452,7 +8483,7 @@ partition_dups (void) if (arr[i]->die_dup != NULL) mark_refs (die_cu (arr[i]), arr[i], arr[i], MARK_REFS_FOLLOW_DUPS); - partition_dups_1 (arr, vec_size, &first_partial_cu, + partition_dups_1 (arr, nr_partitions, partitions, &first_partial_cu, &last_partial_cu, true); for (i = 0; i < vec_size; i++) arr[i]->die_ref_seen = 0;