commit 3738e2d5dd80146afb0427f96df786f8fa7f09b3
Author: Gao,Yan <ygao@suse.com>
Date:   Mon Jan 7 03:01:40 2013 +0800

    High: PE: cl#5130 - Improve the placement for colocated utilization resources

Index: pacemaker/pengine/group.c
===================================================================
--- pacemaker.orig/pengine/group.c
+++ pacemaker/pengine/group.c
@@ -515,3 +515,62 @@ void
 group_append_meta(resource_t * rsc, xmlNode * xml)
 {
 }
+
+GListPtr
+group_find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc,
+                          resource_t * from_rsc, resource_t * orig_rsc)
+{
+    group_variant_data_t *group_data = NULL;
+
+    get_group_variant_data(group_data, rsc);
+    if (group_data->colocated ||
+        (rsc->parent &&
+         (rsc->parent->variant == pe_clone || rsc->parent->variant == pe_master))) {
+        GListPtr gIter = rsc->children;
+
+        for (; gIter != NULL; gIter = gIter->next) {
+            resource_t *child_rsc = (resource_t *) gIter->data;
+
+            colocated_rscs = find_colocated_rscs(colocated_rscs, child_rsc, from_rsc, orig_rsc);
+        }
+
+    } else {
+        if (group_data->first_child) {
+            colocated_rscs = find_colocated_rscs(colocated_rscs, group_data->first_child, from_rsc, orig_rsc);
+        }
+    }
+
+    colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, from_rsc, orig_rsc);
+
+    return colocated_rscs;
+}
+
+void
+group_unallocated_utilization_add(GHashTable * all_utilization, resource_t * rsc,
+                                  GListPtr all_rscs)
+{
+    group_variant_data_t *group_data = NULL;
+
+    get_group_variant_data(group_data, rsc);
+    if (group_data->colocated ||
+        (rsc->parent &&
+         (rsc->parent->variant == pe_clone || rsc->parent->variant == pe_master))) {
+        GListPtr gIter = rsc->children;
+
+        for (; gIter != NULL; gIter = gIter->next) {
+            resource_t *child_rsc = (resource_t *) gIter->data;
+
+            if (is_set(child_rsc->flags, pe_rsc_provisional) &&
+                g_list_find(all_rscs, child_rsc) == FALSE) {
+                calculate_utilization(all_utilization, child_rsc->utilization, TRUE);
+            }
+        }
+
+    } else {
+        if (group_data->first_child &&
+            is_set(group_data->first_child->flags, pe_rsc_provisional) &&
+            g_list_find(all_rscs, group_data->first_child) == FALSE) {
+            calculate_utilization(all_utilization, group_data->first_child->utilization, TRUE);
+        }
+    }
+}
Index: pacemaker/pengine/native.c
===================================================================
--- pacemaker.orig/pengine/native.c
+++ pacemaker/pengine/native.c
@@ -80,7 +80,7 @@ gboolean (*rsc_action_matrix[RSC_ROLE_MA
 
 struct capacity_data {
     node_t *node;
-    resource_t *rsc;
+    const char *rsc_id;
     gboolean is_enough;
 };
 
@@ -95,27 +95,119 @@ check_capacity(gpointer key, gpointer va
     remaining = crm_parse_int(g_hash_table_lookup(data->node->details->utilization, key), "0");
 
     if (required > remaining) {
-        pe_rsc_debug(data->rsc,
-                     "Node %s has no enough %s for resource %s: required=%d remaining=%d",
-                     data->node->details->uname, (char *)key, data->rsc->id, required, remaining);
+        crm_debug("Node %s has no enough %s for %s: required=%d remaining=%d",
+                  data->node->details->uname, (char *)key, data->rsc_id, required, remaining);
         data->is_enough = FALSE;
     }
 }
 
 static gboolean
-have_enough_capacity(node_t * node, resource_t * rsc)
+have_enough_capacity(node_t * node, const char * rsc_id, GHashTable * utilization)
 {
     struct capacity_data data;
 
     data.node = node;
-    data.rsc = rsc;
+    data.rsc_id = rsc_id;
     data.is_enough = TRUE;
 
-    g_hash_table_foreach(rsc->utilization, check_capacity, &data);
+    g_hash_table_foreach(utilization, check_capacity, &data);
 
     return data.is_enough;
 }
 
+static GHashTable *
+sum_unallocated_utilization(resource_t * rsc, GListPtr colocated_rscs)
+{
+    GListPtr gIter = NULL;
+    GListPtr all_rscs = NULL;
+    GHashTable *all_utilization = g_hash_table_new_full(crm_str_hash, g_str_equal,
+                                          g_hash_destroy_str, g_hash_destroy_str); 
+
+    all_rscs = g_list_copy(colocated_rscs);
+    if (g_list_find(all_rscs, rsc) == FALSE) {
+        all_rscs = g_list_append(all_rscs, rsc);
+    }
+
+    for (gIter = all_rscs; gIter != NULL; gIter = gIter->next) {
+        resource_t *listed_rsc = (resource_t *) gIter->data;
+
+        if(is_set(listed_rsc->flags, pe_rsc_provisional) == FALSE) {
+            continue;
+        }
+
+        pe_rsc_trace(rsc, "%s: Processing unallocated colocated %s", rsc->id, listed_rsc->id);
+
+        if (listed_rsc->variant == pe_native) {
+            pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization", rsc->id, listed_rsc->id);
+            calculate_utilization(all_utilization, listed_rsc->utilization, TRUE);
+
+        } else if (listed_rsc->variant == pe_group) {
+            pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization", rsc->id, listed_rsc->id);
+            group_unallocated_utilization_add(all_utilization, listed_rsc, all_rscs);
+
+        } else if (listed_rsc->variant == pe_clone ||
+                   listed_rsc->variant == pe_master) {
+            GListPtr gIter1 = NULL;
+            gboolean existing = FALSE;
+            resource_t *first_child = (resource_t *) listed_rsc->children->data;
+
+            /* Check if there's any child already existing in the list */
+            gIter1 = listed_rsc->children;
+            for (; gIter1 != NULL; gIter1 = gIter1->next) {
+                resource_t *child = (resource_t *) gIter1->data;
+
+                if (g_list_find(all_rscs, child)) {
+                    existing = TRUE;
+                    break;
+                }
+            }
+
+            if (existing) {
+                continue;
+
+            } else if (first_child->variant == pe_native) {
+                pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization",
+                             rsc->id, ID(first_child->xml));
+                calculate_utilization(all_utilization, first_child->utilization, TRUE);
+
+            } else if (first_child->variant == pe_group) {
+                GListPtr gIter2 = NULL;
+                resource_t *match_group = NULL;
+
+                /* Check if there's any grandchild already existing in the list */
+                gIter2 = all_rscs;
+                for (; gIter2 != NULL; gIter2 = gIter2->next) {
+                    resource_t *listed_native = (resource_t *) gIter2->data;
+
+                    if (listed_native->variant == pe_native &&
+                        listed_native->parent &&
+                        listed_native->parent->parent == listed_rsc) {
+                        match_group = listed_native->parent;
+                        break;
+                    }
+
+                    if (match_group) {
+                        if(is_set(match_group->flags, pe_rsc_provisional)) {
+                            pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization",
+                                         rsc->id, match_group->id);
+                            group_unallocated_utilization_add(all_utilization, match_group, all_rscs);
+                        }
+
+                    } else {
+                        pe_rsc_trace(rsc, "%s: Adding %s as colocated utilization",
+                                     rsc->id, ID(first_child->xml));
+                        group_unallocated_utilization_add(all_utilization, first_child, all_rscs);
+                    }
+                }
+            }
+        }
+    }
+
+    g_list_free(all_rscs);
+
+    return all_utilization;
+}
+
 static gboolean
 native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set)
 {
@@ -137,15 +229,63 @@ native_choose_node(resource_t * rsc, nod
 
     if (safe_str_neq(data_set->placement_strategy, "default")) {
         GListPtr gIter = NULL;
+        GListPtr colocated_rscs = NULL;
+        gboolean any_capable = FALSE;
 
-        for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
-            node_t *node = (node_t *) gIter->data;
+        colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, NULL, rsc);
+        if (colocated_rscs) {
+            GHashTable *unallocated_utilization = NULL;
+            char *rscs_id = crm_concat(rsc->id, "and its colocated resources", ' ');
+            node_t *most_capable_node = NULL;
+
+            unallocated_utilization = sum_unallocated_utilization(rsc, colocated_rscs);
+
+            for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
+                node_t *node = (node_t *) gIter->data;
+
+                if (have_enough_capacity(node, rscs_id, unallocated_utilization)) {
+                    any_capable = TRUE;
+                }
+
+                if (most_capable_node == NULL ||
+                    compare_capacity(node, most_capable_node) < 0) {
+                    /* < 0 means 'node' is more capable */
+                    most_capable_node = node;
+                }
+            }
+
+            if (any_capable) {
+                for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
+                    node_t *node = (node_t *) gIter->data;
+
+                    if (have_enough_capacity(node, rscs_id, unallocated_utilization) == FALSE) {
+                        pe_rsc_debug(rsc, "Resource %s and its colocated resources cannot be allocated to node %s: no enough capacity",
+                                     rsc->id, node->details->uname);
+                        resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set);
+                    }
+                }
+
+            } else if (prefer == NULL) {
+                prefer = most_capable_node;
+            }
+
+            if (unallocated_utilization) {
+                g_hash_table_destroy(unallocated_utilization);
+            }
+
+            g_list_free(colocated_rscs);
+            free(rscs_id);
+        }
 
-            if (have_enough_capacity(node, rsc) == FALSE) {
-                pe_rsc_debug(rsc,
-                             "Resource %s cannot be allocated to node %s: none of enough capacity",
-                             rsc->id, node->details->uname);
-                resource_location(rsc, node, -INFINITY, "__limit_utilization_", data_set);
+        if (any_capable == FALSE) {
+            for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
+                node_t *node = (node_t *) gIter->data;
+
+                if (have_enough_capacity(node, rsc->id, rsc->utilization) == FALSE) {
+                    pe_rsc_debug(rsc, "Resource %s cannot be allocated to node %s: no enough capacity",
+                                 rsc->id, node->details->uname);
+                    resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set);
+                }
             }
         }
         dump_node_scores(alloc_details, rsc, "Post-utilization", rsc->allowed_nodes);
@@ -1367,14 +1507,14 @@ enum filter_colocation_res {
 
 static enum filter_colocation_res
 filter_colocation_constraint(resource_t * rsc_lh, resource_t * rsc_rh,
-                             rsc_colocation_t * constraint)
+                             rsc_colocation_t * constraint, gboolean preview)
 {
     if (constraint->score == 0) {
         return influence_nothing;
     }
 
     /* rh side must be allocated before we can process constraint */
-    if (is_set(rsc_rh->flags, pe_rsc_provisional)) {
+    if (preview == FALSE && is_set(rsc_rh->flags, pe_rsc_provisional)) {
         return influence_nothing;
     }
 
@@ -1387,7 +1527,7 @@ filter_colocation_constraint(resource_t
         return influence_rsc_priority;
     }
 
-    if (is_not_set(rsc_lh->flags, pe_rsc_provisional)) {
+    if (preview == FALSE && is_not_set(rsc_lh->flags, pe_rsc_provisional)) {
         /* error check */
         struct node_shared_s *details_lh;
         struct node_shared_s *details_rh;
@@ -1550,7 +1690,7 @@ native_rsc_colocation_rh(resource_t * rs
 {
     enum filter_colocation_res filter_results;
 
-    filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint);
+    filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint, FALSE);
 
     switch (filter_results) {
         case influence_rsc_priority:
@@ -3307,3 +3447,86 @@ native_append_meta(resource_t * rsc, xml
         free(name);
     }
 }
+
+static GListPtr
+colocated_rscs_append(GListPtr colocated_rscs, resource_t * rsc,
+                      resource_t * from_rsc, resource_t * orig_rsc)
+{
+    if (rsc == NULL) {
+        return colocated_rscs;
+
+    /* Avoid searching loop */
+    } else if (rsc == orig_rsc) {
+        return colocated_rscs;
+
+    } else if (g_list_find(colocated_rscs, rsc)) {
+        return colocated_rscs;
+    }
+
+    crm_trace("%s: %s is supposed to be colocated with %s", orig_rsc->id, rsc->id, orig_rsc->id);
+    colocated_rscs = g_list_append(colocated_rscs, rsc);
+
+    if (rsc->variant == pe_group) {
+        /* Need to use group_variant_data */
+        colocated_rscs = group_find_colocated_rscs(colocated_rscs, rsc, from_rsc, orig_rsc);
+
+    } else {
+        colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, from_rsc, orig_rsc);
+    }
+
+    return colocated_rscs;
+}
+
+GListPtr
+find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc,
+                    resource_t * from_rsc, resource_t * orig_rsc)
+{
+    GListPtr gIter = NULL;
+
+    for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
+        rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
+        resource_t *rsc_rh = constraint->rsc_rh;
+
+        /* Avoid going back */
+        if (from_rsc && rsc_rh == from_rsc) {
+            continue;
+        }
+
+        /* Break colocation loop */
+        if (rsc_rh == orig_rsc) {
+            continue;
+        }
+
+        if (constraint->score == INFINITY &&
+            filter_colocation_constraint(rsc, rsc_rh, constraint, TRUE) == influence_rsc_location) {
+            colocated_rscs = colocated_rscs_append(colocated_rscs, rsc_rh, rsc, orig_rsc);
+        }
+    }
+
+    for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
+        rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
+        resource_t *rsc_lh = constraint->rsc_lh;
+
+        /* Avoid going back */
+        if (from_rsc && rsc_lh == from_rsc) {
+            continue;
+        }
+
+        /* Break colocation loop */
+        if (rsc_lh == orig_rsc) {
+            continue;
+        }
+
+        if (rsc_lh->variant <= pe_group && rsc->variant > pe_group) {
+            /* We do not know if rsc_lh will be colocated with orig_rsc in this case */
+            continue;
+        }
+
+        if (constraint->score == INFINITY &&
+            filter_colocation_constraint(rsc_lh, rsc, constraint, TRUE) == influence_rsc_location) {
+            colocated_rscs = colocated_rscs_append(colocated_rscs, rsc_lh, rsc, orig_rsc);
+        }
+    }
+
+    return colocated_rscs;
+}
Index: pacemaker/pengine/utils.c
===================================================================
--- pacemaker.orig/pengine/utils.c
+++ pacemaker/pengine/utils.c
@@ -164,7 +164,7 @@ do_compare_capacity2(gpointer key, gpoin
 /* rc < 0 if 'node1' has more capacity remaining
  * rc > 0 if 'node1' has less capacity remaining
  */
-static int
+int
 compare_capacity(const node_t * node1, const node_t * node2)
 {
     struct compare_data data;
@@ -269,44 +269,41 @@ sort_node_weight(gconstpointer a, gconst
 }
 
 struct calculate_data {
-    node_t *node;
-    gboolean allocate;
+    GHashTable *current_utilization;
+    gboolean plus;
 };
 
 static void
 do_calculate_utilization(gpointer key, gpointer value, gpointer user_data)
 {
-    const char *capacity = NULL;
-    char *remain_capacity = NULL;
+    const char *current = NULL;
+    char *result = NULL;
     struct calculate_data *data = user_data;
 
-    capacity = g_hash_table_lookup(data->node->details->utilization, key);
-    if (capacity) {
-        if (data->allocate) {
-            remain_capacity = crm_itoa(crm_parse_int(capacity, "0") - crm_parse_int(value, "0"));
-        } else {
-            remain_capacity = crm_itoa(crm_parse_int(capacity, "0") + crm_parse_int(value, "0"));
-        }
-        g_hash_table_replace(data->node->details->utilization, strdup(key), remain_capacity);
+    current = g_hash_table_lookup(data->current_utilization, key);
+    if (data->plus) {
+        result = crm_itoa(crm_parse_int(current, "0") + crm_parse_int(value, "0"));
+        g_hash_table_replace(data->current_utilization, strdup(key), result);
+
+    } else if (current) {
+        result = crm_itoa(crm_parse_int(current, "0") - crm_parse_int(value, "0"));
+        g_hash_table_replace(data->current_utilization, strdup(key), result);
     }
 }
 
-/* Specify 'allocate' to TRUE when allocating
- * Otherwise to FALSE when deallocating
+/* Specify 'plus' to FALSE when allocating
+ * Otherwise to TRUE when deallocating
  */
-static void
-calculate_utilization(node_t * node, resource_t * rsc, gboolean allocate)
+void
+calculate_utilization(GHashTable * current_utilization,
+                      GHashTable * utilization, gboolean plus)
 {
     struct calculate_data data;
 
-    data.node = node;
-    data.allocate = allocate;
+    data.current_utilization = current_utilization;
+    data.plus = plus;
 
-    g_hash_table_foreach(rsc->utilization, do_calculate_utilization, &data);
-
-    if (allocate) {
-        dump_rsc_utilization(show_utilization ? 0 : utilization_log_level, __FUNCTION__, rsc, node);
-    }
+    g_hash_table_foreach(utilization, do_calculate_utilization, &data);
 }
 
 void
@@ -322,7 +319,7 @@ native_deallocate(resource_t * rsc)
         old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc);
         old->details->num_resources--;
         /* old->count--; */
-        calculate_utilization(old, rsc, FALSE);
+        calculate_utilization(old->details->utilization, rsc->utilization, TRUE);
         free(old);
     }
 }
@@ -389,7 +386,9 @@ native_assign_node(resource_t * rsc, GLi
     chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc);
     chosen->details->num_resources++;
     chosen->count++;
-    calculate_utilization(chosen, rsc, TRUE);
+    calculate_utilization(chosen->details->utilization, rsc->utilization, FALSE);
+    dump_rsc_utilization(show_utilization ? 0 : utilization_log_level, __FUNCTION__, rsc, chosen);
+
     return TRUE;
 }
 
Index: pacemaker/pengine/utils.h
===================================================================
--- pacemaker.orig/pengine/utils.h
+++ pacemaker/pengine/utils.h
@@ -55,6 +55,19 @@ extern gboolean can_run_any(GHashTable *
 extern resource_t *find_compatible_child(resource_t * local_child, resource_t * rsc,
                                          enum rsc_role_e filter, gboolean current);
 
+extern int compare_capacity(const node_t * node1, const node_t * node2);
+extern void calculate_utilization(GHashTable * current_utilization,
+                                  GHashTable * utilization, gboolean plus);
+
+extern GListPtr find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc, 
+                                    resource_t * from_rsc, resource_t * orig_rsc);
+
+extern GListPtr group_find_colocated_rscs(GListPtr colocated_rscs, resource_t * rsc,
+                                          resource_t * from_rsc, resource_t * orig_rsc);
+
+extern void group_unallocated_utilization_add(GHashTable *all_utilization, resource_t * rsc,
+                                              GListPtr all_rscs);
+
 #  define STONITH_UP "stonith_up"
 #  define STONITH_DONE "stonith_complete"
 #  define ALL_STOPPED "all_stopped"