forked from pool/pacemaker
* bug-806256_pacemaker-log-level-notice.patch * pacemaker-NodeUtilization-RA.patch * pacemaker-colocated-utilization.patch - crmd: All peers need to track node shutdown requests (bsc#917625) - crmd: Cached copies of transient attributes cease to be valid once a node leaves the membership (bsc#917625) - crmd: Wait for all pending operations to complete before poking the policy engine - lrmd: preserve exit reason string when isolation wrappers are in use - docker-wrapper: properly separate docker and resource specific attributes - docker-wrapper: set authkey file permissions and properly set container 'node name' during start - systemd: Trick systemd into not stopping our services before us during shutdown - mcp: Allow a configurable delay when debugging shutdown issues - systemd: Kindly ask dbus NOT to kill the process if the dbus connection fails - systemd: Tell systemd not to take DBus down from underneath us - cib: Correctly set up signal handlers - PE: Do not record duplicate copies of the failed actions - lrmd: enable ipc proxy for docker-wrapper privileged mode - lrmd: properly handle poke requests in lrmd client when using ipc - spec: add docker-wrapper directory to spec file - pengine: disable migrations for resources with isolation containers - pengine: disable reloading of resources within isolated container wrappers - pengine: ability to launch resources in isolated containers - extra: docker container tech wrapper script for pcmk remote - tools: crm_mon prints Stopped clones only if --inactive was specified - tools: display node names more consistently in crm_mon output - tools: Improve crm_mon output with certain option combinations - tools: make crm_mon last updated header consistent across formats - remote: pcmk remote client tool for use with container wrapper script - crmd: Reset stonith failcount to recover transitioner when the node rejoins (bsc#921102) - systemd: fix crash caused when canceling in-flight operation OBS-URL:
532 lines
20 KiB
532 lines
20 KiB
commit 3738e2d5dd80146afb0427f96df786f8fa7f09b3
Author: Gao,Yan <>
Date: Mon Jan 7 03:01:40 2013 +0800
High: PE: cl#5130 - Improve the placement for colocated utilization resources
Index: pacemaker/pengine/group.c
--- pacemaker.orig/pengine/group.c
+++ pacemaker/pengine/group.c
@@ -515,3 +515,62 @@ void
group_append_meta(resource_t * rsc, xmlNode * xml)
+group_find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc,
+ resource_t * from_rsc, resource_t * orig_rsc)
+ group_variant_data_t *group_data = NULL;
+ get_group_variant_data(group_data, rsc);
+ if (group_data->colocated ||
+ (rsc->parent &&
+ (rsc->parent->variant == pe_clone || rsc->parent->variant == pe_master))) {
+ GListPtr gIter = rsc->children;
+ for (; gIter != NULL; gIter = gIter->next) {
+ resource_t *child_rsc = (resource_t *) gIter->data;
+ colocated_rscs = find_colocated_rscs(colocated_rscs, child_rsc, from_rsc, orig_rsc);
+ }
+ } else {
+ if (group_data->first_child) {
+ colocated_rscs = find_colocated_rscs(colocated_rscs, group_data->first_child, from_rsc, orig_rsc);
+ }
+ }
+ colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, from_rsc, orig_rsc);
+ return colocated_rscs;
+group_unallocated_utilization_add(GHashTable * all_utilization, resource_t * rsc,
+ GListPtr all_rscs)
+ group_variant_data_t *group_data = NULL;
+ get_group_variant_data(group_data, rsc);
+ if (group_data->colocated ||
+ (rsc->parent &&
+ (rsc->parent->variant == pe_clone || rsc->parent->variant == pe_master))) {
+ GListPtr gIter = rsc->children;
+ for (; gIter != NULL; gIter = gIter->next) {
+ resource_t *child_rsc = (resource_t *) gIter->data;
+ if (is_set(child_rsc->flags, pe_rsc_provisional) &&
+ g_list_find(all_rscs, child_rsc) == FALSE) {
+ calculate_utilization(all_utilization, child_rsc->utilization, TRUE);
+ }
+ }
+ } else {
+ if (group_data->first_child &&
+ is_set(group_data->first_child->flags, pe_rsc_provisional) &&
+ g_list_find(all_rscs, group_data->first_child) == FALSE) {
+ calculate_utilization(all_utilization, group_data->first_child->utilization, TRUE);
+ }
+ }
Index: pacemaker/pengine/native.c
--- pacemaker.orig/pengine/native.c
+++ pacemaker/pengine/native.c
@@ -80,7 +80,7 @@ gboolean (*rsc_action_matrix[RSC_ROLE_MA
struct capacity_data {
node_t *node;
- resource_t *rsc;
+ const char *rsc_id;
gboolean is_enough;
@@ -95,30 +95,122 @@ check_capacity(gpointer key, gpointer va
remaining = crm_parse_int(g_hash_table_lookup(data->node->details->utilization, key), "0");
if (required > remaining) {
- CRM_ASSERT(data->rsc);
+ CRM_ASSERT(data->rsc_id);
- pe_rsc_debug(data->rsc,
- "Node %s has no enough %s for resource %s: required=%d remaining=%d",
- data->node->details->uname, (char *)key, data->rsc->id, required, remaining);
+ crm_debug("Node %s has no enough %s for %s: required=%d remaining=%d",
+ data->node->details->uname, (char *)key, data->rsc_id, required, remaining);
data->is_enough = FALSE;
static gboolean
-have_enough_capacity(node_t * node, resource_t * rsc)
+have_enough_capacity(node_t * node, const char * rsc_id, GHashTable * utilization)
struct capacity_data data;
data.node = node;
- data.rsc = rsc;
+ data.rsc_id = rsc_id;
data.is_enough = TRUE;
- g_hash_table_foreach(rsc->utilization, check_capacity, &data);
+ g_hash_table_foreach(utilization, check_capacity, &data);
return data.is_enough;
+static GHashTable *
+sum_unallocated_utilization(resource_t * rsc, GListPtr colocated_rscs)
+ GListPtr gIter = NULL;
+ GListPtr all_rscs = NULL;
+ GHashTable *all_utilization = g_hash_table_new_full(crm_str_hash, g_str_equal,
+ g_hash_destroy_str, g_hash_destroy_str);
+ all_rscs = g_list_copy(colocated_rscs);
+ if (g_list_find(all_rscs, rsc) == FALSE) {
+ all_rscs = g_list_append(all_rscs, rsc);
+ }
+ for (gIter = all_rscs; gIter != NULL; gIter = gIter->next) {
+ resource_t *listed_rsc = (resource_t *) gIter->data;
+ if(is_set(listed_rsc->flags, pe_rsc_provisional) == FALSE) {
+ continue;
+ }
+ pe_rsc_trace(rsc, "%s: Processing unallocated colocated %s", rsc->id, listed_rsc->id);
+ if (listed_rsc->variant == pe_native) {
+ pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization", rsc->id, listed_rsc->id);
+ calculate_utilization(all_utilization, listed_rsc->utilization, TRUE);
+ } else if (listed_rsc->variant == pe_group) {
+ pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization", rsc->id, listed_rsc->id);
+ group_unallocated_utilization_add(all_utilization, listed_rsc, all_rscs);
+ } else if (listed_rsc->variant == pe_clone ||
+ listed_rsc->variant == pe_master) {
+ GListPtr gIter1 = NULL;
+ gboolean existing = FALSE;
+ resource_t *first_child = (resource_t *) listed_rsc->children->data;
+ /* Check if there's any child already existing in the list */
+ gIter1 = listed_rsc->children;
+ for (; gIter1 != NULL; gIter1 = gIter1->next) {
+ resource_t *child = (resource_t *) gIter1->data;
+ if (g_list_find(all_rscs, child)) {
+ existing = TRUE;
+ break;
+ }
+ }
+ if (existing) {
+ continue;
+ } else if (first_child->variant == pe_native) {
+ pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization",
+ rsc->id, ID(first_child->xml));
+ calculate_utilization(all_utilization, first_child->utilization, TRUE);
+ } else if (first_child->variant == pe_group) {
+ GListPtr gIter2 = NULL;
+ resource_t *match_group = NULL;
+ /* Check if there's any grandchild already existing in the list */
+ gIter2 = all_rscs;
+ for (; gIter2 != NULL; gIter2 = gIter2->next) {
+ resource_t *listed_native = (resource_t *) gIter2->data;
+ if (listed_native->variant == pe_native &&
+ listed_native->parent &&
+ listed_native->parent->parent == listed_rsc) {
+ match_group = listed_native->parent;
+ break;
+ }
+ if (match_group) {
+ if(is_set(match_group->flags, pe_rsc_provisional)) {
+ pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization",
+ rsc->id, match_group->id);
+ group_unallocated_utilization_add(all_utilization, match_group, all_rscs);
+ }
+ } else {
+ pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization",
+ rsc->id, ID(first_child->xml));
+ group_unallocated_utilization_add(all_utilization, first_child, all_rscs);
+ }
+ }
+ }
+ }
+ }
+ g_list_free(all_rscs);
+ return all_utilization;
static gboolean
native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set)
@@ -137,15 +229,63 @@ native_choose_node(resource_t * rsc, nod
if (safe_str_neq(data_set->placement_strategy, "default")) {
GListPtr gIter = NULL;
+ GListPtr colocated_rscs = NULL;
+ gboolean any_capable = FALSE;
- for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
- node_t *node = (node_t *) gIter->data;
+ colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, NULL, rsc);
+ if (colocated_rscs) {
+ GHashTable *unallocated_utilization = NULL;
+ char *rscs_id = crm_concat(rsc->id, "and its colocated resources", ' ');
+ node_t *most_capable_node = NULL;
+ unallocated_utilization = sum_unallocated_utilization(rsc, colocated_rscs);
+ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
+ node_t *node = (node_t *) gIter->data;
+ if (have_enough_capacity(node, rscs_id, unallocated_utilization)) {
+ any_capable = TRUE;
+ }
+ if (most_capable_node == NULL ||
+ compare_capacity(node, most_capable_node) < 0) {
+ /* < 0 means 'node' is more capable */
+ most_capable_node = node;
+ }
+ }
+ if (any_capable) {
+ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
+ node_t *node = (node_t *) gIter->data;
+ if (have_enough_capacity(node, rscs_id, unallocated_utilization) == FALSE) {
+ pe_rsc_debug(rsc, "Resource %s and its colocated resources cannot be allocated to node %s: no enough capacity",
+ rsc->id, node->details->uname);
+ resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set);
+ }
+ }
+ } else if (prefer == NULL) {
+ prefer = most_capable_node;
+ }
+ if (unallocated_utilization) {
+ g_hash_table_destroy(unallocated_utilization);
+ }
+ g_list_free(colocated_rscs);
+ free(rscs_id);
+ }
- if (have_enough_capacity(node, rsc) == FALSE) {
- pe_rsc_debug(rsc,
- "Resource %s cannot be allocated to node %s: none of enough capacity",
- rsc->id, node->details->uname);
- resource_location(rsc, node, -INFINITY, "__limit_utilization_", data_set);
+ if (any_capable == FALSE) {
+ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
+ node_t *node = (node_t *) gIter->data;
+ if (have_enough_capacity(node, rsc->id, rsc->utilization) == FALSE) {
+ pe_rsc_debug(rsc, "Resource %s cannot be allocated to node %s: no enough capacity",
+ rsc->id, node->details->uname);
+ resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set);
+ }
dump_node_scores(alloc_details, rsc, "Post-utilization", rsc->allowed_nodes);
@@ -1367,14 +1507,14 @@ enum filter_colocation_res {
static enum filter_colocation_res
filter_colocation_constraint(resource_t * rsc_lh, resource_t * rsc_rh,
- rsc_colocation_t * constraint)
+ rsc_colocation_t * constraint, gboolean preview)
if (constraint->score == 0) {
return influence_nothing;
/* rh side must be allocated before we can process constraint */
- if (is_set(rsc_rh->flags, pe_rsc_provisional)) {
+ if (preview == FALSE && is_set(rsc_rh->flags, pe_rsc_provisional)) {
return influence_nothing;
@@ -1387,7 +1527,7 @@ filter_colocation_constraint(resource_t
return influence_rsc_priority;
- if (is_not_set(rsc_lh->flags, pe_rsc_provisional)) {
+ if (preview == FALSE && is_not_set(rsc_lh->flags, pe_rsc_provisional)) {
/* error check */
struct node_shared_s *details_lh;
struct node_shared_s *details_rh;
@@ -1550,7 +1690,7 @@ native_rsc_colocation_rh(resource_t * rs
- filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint);
+ filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint, FALSE);
pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d, filter=%d)",
constraint->score >= 0 ? "" : "Anti-",
rsc_lh->id, rsc_rh->id, constraint->id, constraint->score, filter_results);
@@ -3307,3 +3447,86 @@ native_append_meta(resource_t * rsc, xml
+static GListPtr
+colocated_rscs_append(GListPtr colocated_rscs, resource_t * rsc,
+ resource_t * from_rsc, resource_t * orig_rsc)
+ if (rsc == NULL) {
+ return colocated_rscs;
+ /* Avoid searching loop */
+ } else if (rsc == orig_rsc) {
+ return colocated_rscs;
+ } else if (g_list_find(colocated_rscs, rsc)) {
+ return colocated_rscs;
+ }
+ crm_trace("%s: %s is supposed to be colocated with %s", orig_rsc->id, rsc->id, orig_rsc->id);
+ colocated_rscs = g_list_append(colocated_rscs, rsc);
+ if (rsc->variant == pe_group) {
+ /* Need to use group_variant_data */
+ colocated_rscs = group_find_colocated_rscs(colocated_rscs, rsc, from_rsc, orig_rsc);
+ } else {
+ colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, from_rsc, orig_rsc);
+ }
+ return colocated_rscs;
+find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc,
+ resource_t * from_rsc, resource_t * orig_rsc)
+ GListPtr gIter = NULL;
+ for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
+ rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
+ resource_t *rsc_rh = constraint->rsc_rh;
+ /* Avoid going back */
+ if (from_rsc && rsc_rh == from_rsc) {
+ continue;
+ }
+ /* Break colocation loop */
+ if (rsc_rh == orig_rsc) {
+ continue;
+ }
+ if (constraint->score == INFINITY &&
+ filter_colocation_constraint(rsc, rsc_rh, constraint, TRUE) == influence_rsc_location) {
+ colocated_rscs = colocated_rscs_append(colocated_rscs, rsc_rh, rsc, orig_rsc);
+ }
+ }
+ for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
+ rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
+ resource_t *rsc_lh = constraint->rsc_lh;
+ /* Avoid going back */
+ if (from_rsc && rsc_lh == from_rsc) {
+ continue;
+ }
+ /* Break colocation loop */
+ if (rsc_lh == orig_rsc) {
+ continue;
+ }
+ if (rsc_lh->variant <= pe_group && rsc->variant > pe_group) {
+ /* We do not know if rsc_lh will be colocated with orig_rsc in this case */
+ continue;
+ }
+ if (constraint->score == INFINITY &&
+ filter_colocation_constraint(rsc_lh, rsc, constraint, TRUE) == influence_rsc_location) {
+ colocated_rscs = colocated_rscs_append(colocated_rscs, rsc_lh, rsc, orig_rsc);
+ }
+ }
+ return colocated_rscs;
Index: pacemaker/pengine/utils.c
--- pacemaker.orig/pengine/utils.c
+++ pacemaker/pengine/utils.c
@@ -164,7 +164,7 @@ do_compare_capacity2(gpointer key, gpoin
/* rc < 0 if 'node1' has more capacity remaining
* rc > 0 if 'node1' has less capacity remaining
-static int
compare_capacity(const node_t * node1, const node_t * node2)
struct compare_data data;
@@ -269,44 +269,41 @@ sort_node_weight(gconstpointer a, gconst
struct calculate_data {
- node_t *node;
- gboolean allocate;
+ GHashTable *current_utilization;
+ gboolean plus;
static void
do_calculate_utilization(gpointer key, gpointer value, gpointer user_data)
- const char *capacity = NULL;
- char *remain_capacity = NULL;
+ const char *current = NULL;
+ char *result = NULL;
struct calculate_data *data = user_data;
- capacity = g_hash_table_lookup(data->node->details->utilization, key);
- if (capacity) {
- if (data->allocate) {
- remain_capacity = crm_itoa(crm_parse_int(capacity, "0") - crm_parse_int(value, "0"));
- } else {
- remain_capacity = crm_itoa(crm_parse_int(capacity, "0") + crm_parse_int(value, "0"));
- }
- g_hash_table_replace(data->node->details->utilization, strdup(key), remain_capacity);
+ current = g_hash_table_lookup(data->current_utilization, key);
+ if (data->plus) {
+ result = crm_itoa(crm_parse_int(current, "0") + crm_parse_int(value, "0"));
+ g_hash_table_replace(data->current_utilization, strdup(key), result);
+ } else if (current) {
+ result = crm_itoa(crm_parse_int(current, "0") - crm_parse_int(value, "0"));
+ g_hash_table_replace(data->current_utilization, strdup(key), result);
-/* Specify 'allocate' to TRUE when allocating
- * Otherwise to FALSE when deallocating
+/* Specify 'plus' to FALSE when allocating
+ * Otherwise to TRUE when deallocating
-static void
-calculate_utilization(node_t * node, resource_t * rsc, gboolean allocate)
+calculate_utilization(GHashTable * current_utilization,
+ GHashTable * utilization, gboolean plus)
struct calculate_data data;
- data.node = node;
- data.allocate = allocate;
+ data.current_utilization = current_utilization;
+ = plus;
- g_hash_table_foreach(rsc->utilization, do_calculate_utilization, &data);
- if (allocate) {
- dump_rsc_utilization(show_utilization ? 0 : utilization_log_level, __FUNCTION__, rsc, node);
- }
+ g_hash_table_foreach(utilization, do_calculate_utilization, &data);
@@ -322,7 +319,7 @@ native_deallocate(resource_t * rsc)
old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc);
/* old->count--; */
- calculate_utilization(old, rsc, FALSE);
+ calculate_utilization(old->details->utilization, rsc->utilization, TRUE);
@@ -389,7 +386,9 @@ native_assign_node(resource_t * rsc, GLi
chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc);
- calculate_utilization(chosen, rsc, TRUE);
+ calculate_utilization(chosen->details->utilization, rsc->utilization, FALSE);
+ dump_rsc_utilization(show_utilization ? 0 : utilization_log_level, __FUNCTION__, rsc, chosen);
return TRUE;
Index: pacemaker/pengine/utils.h
--- pacemaker.orig/pengine/utils.h
+++ pacemaker/pengine/utils.h
@@ -55,6 +55,19 @@ extern gboolean can_run_any(GHashTable *
extern resource_t *find_compatible_child(resource_t * local_child, resource_t * rsc,
enum rsc_role_e filter, gboolean current);
+extern int compare_capacity(const node_t * node1, const node_t * node2);
+extern void calculate_utilization(GHashTable * current_utilization,
+ GHashTable * utilization, gboolean plus);
+extern GListPtr find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc,
+ resource_t * from_rsc, resource_t * orig_rsc);
+extern GListPtr group_find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc,
+ resource_t * from_rsc, resource_t * orig_rsc);
+extern void group_unallocated_utilization_add(GHashTable *all_utilization, resource_t * rsc,
+ GListPtr all_rscs);
# define STONITH_UP "stonith_up"
# define STONITH_DONE "stonith_complete"
# define ALL_STOPPED "all_stopped"