140 lines
4.8 KiB
Diff
140 lines
4.8 KiB
Diff
|
# Commit 1d80765b504b34b63a42a63aff4291e07e29f0c5
|
||
|
# Date 2013-03-12 15:34:22 +0100
|
||
|
# Author Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||
|
# Committer Jan Beulich <jbeulich@suse.com>
|
||
|
powernow: add fixups for AMD P-state figures
|
||
|
|
||
|
In the Linux kernel, these two git commits:
|
||
|
|
||
|
- f594065faf4f9067c2283a34619fc0714e79a98d
|
||
|
ACPI: Add fixups for AMD P-state figures
|
||
|
- 9855d8ce41a7801548a05d844db2f46c3e810166
|
||
|
ACPI: Check MSR valid bit before using P-state frequencies
|
||
|
|
||
|
Try to fix the the issue that "some AMD systems may round the
|
||
|
frequencies in ACPI tables to 100MHz boundaries. We can obtain the real
|
||
|
frequencies from MSRs, so add a quirk to fix these frequencies up
|
||
|
on AMD systems." (from f594065..)
|
||
|
|
||
|
In discussion (around 9855d8..) "it turned out that indeed real
|
||
|
HW/BIOSes may choose to not set the valid bit and thus mark the
|
||
|
P-state as invalid. So this could be considered a fix for broken
|
||
|
BIOSes." (from 9855d8..)
|
||
|
|
||
|
which is great for Linux. Unfortunatly the Linux kernel, when
|
||
|
it tries to do the RDMSR under Xen it fails to get the right
|
||
|
value (it gets zero) as Xen traps it and returns zero. Hence
|
||
|
when dom0 uploads the P-states they will be unmodified and
|
||
|
we should take care of updating the frequencies with the right
|
||
|
values.
|
||
|
|
||
|
I've tested it under Dell Inc. PowerEdge T105 /0RR825, BIOS 1.3.2
|
||
|
08/20/2008 where this quirk can be observed (x86 == 0x10, model == 2).
|
||
|
Also on other AMD (x86 == 0x12, A8-3850; x86 = 0x14, AMD E-350) to
|
||
|
make sure the quirk is not applied there.
|
||
|
|
||
|
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||
|
Acked-by: stefan.bader@canonical.com
|
||
|
|
||
|
Do the MSR access here (and while at it, also the one reading
|
||
|
MSR_PSTATE_CUR_LIMIT) on the target CPU, and bound the loop over
|
||
|
amd_fixup_frequency() by max_hw_pstate (matching the one in
|
||
|
powernow_cpufreq_cpu_init()).
|
||
|
|
||
|
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||
|
|
||
|
--- a/xen/arch/x86/acpi/cpufreq/powernow.c
|
||
|
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c
|
||
|
@@ -159,6 +159,51 @@ static int powernow_cpufreq_target(struc
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
+static void amd_fixup_frequency(struct xen_processor_px *px)
|
||
|
+{
|
||
|
+ u32 hi, lo, fid, did;
|
||
|
+ int index = px->control & 0x00000007;
|
||
|
+ const struct cpuinfo_x86 *c = ¤t_cpu_data;
|
||
|
+
|
||
|
+ if ((c->x86 != 0x10 || c->x86_model >= 10) && c->x86 != 0x11)
|
||
|
+ return;
|
||
|
+
|
||
|
+ rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
|
||
|
+ /*
|
||
|
+ * MSR C001_0064+:
|
||
|
+ * Bit 63: PstateEn. Read-write. If set, the P-state is valid.
|
||
|
+ */
|
||
|
+ if (!(hi & (1U << 31)))
|
||
|
+ return;
|
||
|
+
|
||
|
+ fid = lo & 0x3f;
|
||
|
+ did = (lo >> 6) & 7;
|
||
|
+ if (c->x86 == 0x10)
|
||
|
+ px->core_frequency = (100 * (fid + 16)) >> did;
|
||
|
+ else
|
||
|
+ px->core_frequency = (100 * (fid + 8)) >> did;
|
||
|
+}
|
||
|
+
|
||
|
+struct amd_cpu_data {
|
||
|
+ struct processor_performance *perf;
|
||
|
+ u32 max_hw_pstate;
|
||
|
+};
|
||
|
+
|
||
|
+static void get_cpu_data(void *arg)
|
||
|
+{
|
||
|
+ struct amd_cpu_data *data = arg;
|
||
|
+ struct processor_performance *perf = data->perf;
|
||
|
+ uint64_t msr_content;
|
||
|
+ unsigned int i;
|
||
|
+
|
||
|
+ rdmsrl(MSR_PSTATE_CUR_LIMIT, msr_content);
|
||
|
+ data->max_hw_pstate = (msr_content & HW_PSTATE_MAX_MASK) >>
|
||
|
+ HW_PSTATE_MAX_SHIFT;
|
||
|
+
|
||
|
+ for (i = 0; i < perf->state_count && i <= data->max_hw_pstate; i++)
|
||
|
+ amd_fixup_frequency(&perf->states[i]);
|
||
|
+}
|
||
|
+
|
||
|
static int powernow_cpufreq_verify(struct cpufreq_policy *policy)
|
||
|
{
|
||
|
struct acpi_cpufreq_data *data;
|
||
|
@@ -205,8 +250,7 @@ static int powernow_cpufreq_cpu_init(str
|
||
|
struct acpi_cpufreq_data *data;
|
||
|
unsigned int result = 0;
|
||
|
struct processor_performance *perf;
|
||
|
- u32 max_hw_pstate;
|
||
|
- uint64_t msr_content;
|
||
|
+ struct amd_cpu_data info;
|
||
|
struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
|
||
|
|
||
|
data = xzalloc(struct acpi_cpufreq_data);
|
||
|
@@ -217,7 +261,7 @@ static int powernow_cpufreq_cpu_init(str
|
||
|
|
||
|
data->acpi_data = &processor_pminfo[cpu]->perf;
|
||
|
|
||
|
- perf = data->acpi_data;
|
||
|
+ info.perf = perf = data->acpi_data;
|
||
|
policy->shared_type = perf->shared_type;
|
||
|
|
||
|
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
|
||
|
@@ -239,8 +283,6 @@ static int powernow_cpufreq_cpu_init(str
|
||
|
result = -ENODEV;
|
||
|
goto err_unreg;
|
||
|
}
|
||
|
- rdmsrl(MSR_PSTATE_CUR_LIMIT, msr_content);
|
||
|
- max_hw_pstate = (msr_content & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
|
||
|
|
||
|
if (perf->control_register.space_id != perf->status_register.space_id) {
|
||
|
result = -ENODEV;
|
||
|
@@ -265,8 +307,10 @@ static int powernow_cpufreq_cpu_init(str
|
||
|
|
||
|
policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;
|
||
|
|
||
|
+ on_selected_cpus(cpumask_of(cpu), get_cpu_data, &info, 1);
|
||
|
+
|
||
|
/* table init */
|
||
|
- for (i = 0; i < perf->state_count && i <= max_hw_pstate; i++) {
|
||
|
+ for (i = 0; i < perf->state_count && i <= info.max_hw_pstate; i++) {
|
||
|
if (i > 0 && perf->states[i].core_frequency >=
|
||
|
data->freq_table[valid_states-1].frequency / 1000)
|
||
|
continue;
|