Skip to content

Commit

Permalink
lib.pmu: rudimentary support for AMD family 17h CPUs
Browse files Browse the repository at this point in the history
A machine readable listing of available PMU events for various AMD cpu models
was not available, so instead I added a way to specify events by raw code by
supplying  a hexadecimal string literal (i.e., "0x00c0"). That way, lib.pmu
is still useful on AMD CPUs for brave souls armed with a processor manual.
  • Loading branch information
eugeneia committed Jun 15, 2018
1 parent b9da7ca commit 4a40691
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 53 deletions.
57 changes: 20 additions & 37 deletions src/lib/pmu.lua
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ local enabled = nil
-- available on the running CPU.
local function scan_available_counters ()
if defs then return defs end
defs = {}
for i, set in ipairs(pmu_cpu) do
local cpu, version, kind, list = unpack(set)
-- XXX Only supporting "core" counters at present i.e. the
-- counters built into the CPU core.
if cpu == pmu_x86.cpu_model and kind == 'core' then
defs = defs or {}
defs = defs
for k, v in pairs(list) do defs[k] = v end
end
end
defs = defs or false
end

-- Return an array containing the CPUs that we have affinity with.
Expand All @@ -53,17 +53,13 @@ function is_available ()
if #cpu_set() ~= 1 then
return false, "single core cpu affinity required"
end
if not S.stat("/dev/cpu/0/msr") then
print("[pmu: /sbin/modprobe msr]")
os.execute("/sbin/modprobe msr")
if not S.stat("/dev/cpu/0/msr") then
return false, "requires /dev/cpu/*/msr (Linux 'msr' module)"
end
if not pmu_x86.enable_msr() then
return false, "requires /dev/cpu/*/msr (Linux 'msr' module)"
end
scan_available_counters()
if not defs then
return false, "CPU not recognized: " .. pmu_x86.cpu_model
if not (pmu_x86.ncounters > 0) then
return false, "CPU not supported: " .. pmu_x86.cpu_model
end
scan_available_counters()
return true
end

Expand Down Expand Up @@ -113,47 +109,34 @@ function setup (patterns)
table.insert(set, event)
end
end
table.sort(set)
end
-- Allow selecting events by raw code
for _, pattern in pairs(patterns or {}) do
if pattern:match("^0x") then
table.insert(set, pattern)
end
end
table.sort(set)
local ndropped = math.max(0, #set - pmu_x86.ngeneral)
while (#set - pmu_x86.ngeneral) > 0 do table.remove(set) end
local cpu = cpu_set()[1]
-- All available counters are globally enabled
-- (IA32_PERF_GLOBAL_CTRL).
writemsr(cpu, 0x38f,
bit.bor(bit.lshift(0x3ULL, 32),
bit.lshift(1ULL, pmu_x86.ngeneral) - 1))
-- Enable all fixed-function counters (IA32_FIXED_CTR_CTRL)
writemsr(cpu, 0x38d, 0x333)
local used
enabled, used = pmu_x86.init_events(cpu, #set)
for n = 0, #set-1 do
local code = defs[set[n+1]]
local USR = bit.lshift(1, 16)
local EN = bit.lshift(1, 22)
writemsr(cpu, 0x186+n, bit.bor(0x10000, USR, EN, code))
local code = defs[set[n+1]] or tonumber(set[n+1])
pmu_x86.enable_event(cpu, n+used, code)
end
enabled = {"instructions", "cycles", "ref_cycles"}
for i = 1, #set do table.insert(enabled, set[i]) end
return ndropped
end

function writemsr (cpu, msr, value)
local msrfile = ("/dev/cpu/%d/msr"):format(cpu)
if not S.stat(msrfile) then
error("Cannot open "..msrfile.." (consider 'modprobe msr')")
end
local fd = assert(S.open(msrfile, "rdwr"))
assert(fd:lseek(msr, "set"))
assert(fd:write(ffi.new("uint64_t[1]", value), 8))
fd:close()
end

-- API function (see above)
function report (tab, aux)
aux = aux or {}
local data = {}
for k,v in pairs(tab) do table.insert(data, {k=k,v=v}) end
-- Sort fixed-purpose counters to come first in definite order
local fixed = {cycles='0', ref_cycles='1', instructions='2'}
local fixed = {cycles='0000', ref_cycles='0001', instructions='0002'}
table.sort(data, function(x,y)
return (fixed[x.k] or x.k) < (fixed[y.k] or y.k)
end)
Expand Down Expand Up @@ -239,7 +222,7 @@ function selftest ()
print('', k, v)
n = n + 1
end
assert(n == 3)
assert(n >= 2)
print("selftest ok")
end

121 changes: 105 additions & 16 deletions src/lib/pmu_x86.dasl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ local debug = false

local lib = require("core.lib")
local ffi = require("ffi")
local C = ffi.C
local S = require("syscall")

local dasm = require("dasm")

Expand Down Expand Up @@ -85,6 +85,7 @@ name.reg.ebx, name.reg.ecx, name.reg.edx = id.ebx, id.ecx, id.edx
local vendor = ffi.string(name.string, 12)
cpuid(0x1, id)
local family = bit.band(bit.rshift(id.eax, 8), 0xf)
local extfamily = bit.band(bit.rshift(id.eax, 20), 0xff)
local model = bit.band(bit.rshift(id.eax, 4), 0xf)
local extmodel = bit.band(bit.rshift(id.eax, 16), 0xf)

Expand All @@ -93,13 +94,28 @@ local extmodel = bit.band(bit.rshift(id.eax, 16), 0xf)
-- (Could alternatively grovel this from /proc/cpuinfo.)
cpu_model = ("%s-%X-%X%X"):format(vendor, family, extmodel, model)

-- Calculate nfixed, ngeneral, ncounters: number of CPU performance
-- counters for the running CPU.
local id = ffi.new(cpuid_t)
cpuid(0xa, id)
nfixed = bit.band(id.edx, 0x1f)
ngeneral = bit.band(bit.rshift(id.eax, 8), 0xff)
ncounters = nfixed + ngeneral
-- PMC control register base and step.
local pmc_ctl_base, pmc_ctl_step

if vendor == "GenuineIntel" then
pmc_ctl_base, pmc_ctl_step = 0x186, 1
-- Calculate nfixed, ngeneral, ncounters: number of CPU performance
-- counters for the running CPU.
local id = ffi.new(cpuid_t)
cpuid(0xa, id)
nfixed = bit.band(id.edx, 0x1f)
ngeneral = bit.band(bit.rshift(id.eax, 8), 0xff)
elseif vendor == "AuthenticAMD" then
if family+extfamily >= 0x15 then
pmc_ctl_base, pmc_ctl_step = 0xc0010200, 2
nfixed = 0
ngeneral = 4
end
if family+extfamily >= 0x17 then
ngeneral = 6
end
end
ncounters = (nfixed or 0) + (ngeneral or 0)

-- rdpmc_multi(uint64_t[nfixed+ngeneral] *dst)
--
Expand Down Expand Up @@ -147,22 +163,95 @@ function enable_rdpmc ()
end
end

-- Enable MSR
function enable_msr ()
if not S.stat("/dev/cpu/0/msr") then
print("[pmu: modprobe msr]")
os.execute("modprobe msr")
if not S.stat("/dev/cpu/0/msr") then
return false, "requires /dev/cpu/*/msr (Linux 'msr' module)"
end
end
return true
end

local function writemsr (cpu, msr, value)
local msrfile = ("/dev/cpu/%d/msr"):format(cpu)
if not S.stat(msrfile) then
error("Cannot open "..msrfile.." (consider 'modprobe msr')")
end
local fd = assert(S.open(msrfile, "rdwr"))
assert(fd:lseek(msr, "set"))
assert(fd:write(ffi.new("uint64_t[1]", value), 8))
fd:close()
end

-- Platform specifc MSR functions:
--
-- init_events(cpu, nevents) -> fixed-function counters, gen. ctrs claimed
-- Initializes and enables fixed-function counters.
--
-- enable_event(index, code)
-- Sets up counter at index to count event(s) by code.

if vendor == "GenuineIntel" then
function init_events (cpu)
-- All available counters are globally enabled
-- (IA32_PERF_GLOBAL_CTRL).
writemsr(cpu, 0x38f, bit.bor(bit.lshift(0x3ULL, 32),
bit.lshift(1ULL, pmu_x86.ngeneral) - 1))
-- Enable all fixed-function counters (IA32_FIXED_CTR_CTRL)
writemsr(cpu, 0x38d, 0x333)
return {"instructions", "cycles", "ref_cycles"}, 0
end

elseif vendor == "AuthenticAMD" and family+extfamily >= 0x15 then
function init_events (cpu, nselected)
-- No setup, no fixed-function counters. To keep API portability we add
-- enable some events by default if there is room.
local default = {}
for event, code in pairs({instructions=0x00c0, cycles=0x0076}) do
if nselected < ngeneral then
enable_event(cpu, #default, code)
default[#default+1] = event
nselected = nselected + 1
end
end
return default, #default
end
end

function enable_event (cpu, index, code)
local USR = bit.lshift(1, 16)
local EN = bit.lshift(1, 22)
-- AMD BKDG says: To accurately start counting with the write that enables
-- the counter, disable the counter when changing the event and then enable
-- the counter with a second MSR write
writemsr(cpu, pmc_ctl_base+index*pmc_ctl_step, 0)
writemsr(cpu, pmc_ctl_base+index*pmc_ctl_step, bit.bor(USR, EN, code))
end

function selftest ()
print("selftest: pmu_x86")
enable_rdpmc()
-- Expected values for Sandy Bridge - Skylake
print("nfixed", nfixed, "ngeneral", ngeneral)
assert(nfixed == 3, "nfixed: " .. nfixed)
assert(ngeneral == 4 or ngeneral == 8, "ngeneral: " .. ngeneral)
local buf = ffi.new("uint64_t[?]", nfixed + ngeneral + 1)
if vendor == "GenuineIntel" then
-- Expected values for Sandy Bridge - Skylake
assert(nfixed == 3, "nfixed: " .. nfixed)
assert(ngeneral == 4 or ngeneral == 8, "ngeneral: " .. ngeneral)
elseif vendor == "AuthenticAMD" and family+extfamily >= 0x15 then
assert(nfixed == 0, "nfixed: " .. nfixed)
assert(ngeneral >= 4, "ngeneral: " .. nfixed)
end
local buf = ffi.new("uint64_t[?]", ncounters + 1)
local magic = 0x0001020304050607ULL
-- Store magic number in all fields (including extra sentinel)
for i = 0, nfixed+ngeneral do buf[i] = magic end
for i = 0, ncounters do buf[i] = magic end
rdpmc_multi(buf)
for i = 0, 9-1 do print("buf["..i.."]", tonumber(buf[i])) end
for i = 0, ncounters do print("buf["..i.."]", tonumber(buf[i])) end
-- Check that all values are written
for i = 0, nfixed+ngeneral-1 do assert(buf[i] ~= magic, "overwrite") end
assert(buf[nfixed+ngeneral] == magic, "sentinel")
for i = 0, ncounters-1 do assert(buf[i] ~= magic, "overwrite") end
assert(buf[ncounters] == magic, "sentinel")
print("selftest: ok")
end

0 comments on commit 4a40691

Please sign in to comment.