Skip to content

Commit

Permalink
WIP: Implement function multi versioning in sysimg
Browse files Browse the repository at this point in the history
  • Loading branch information
yuyichao committed May 13, 2017
1 parent f28f57c commit 0aefb4e
Show file tree
Hide file tree
Showing 6 changed files with 344 additions and 2 deletions.
18 changes: 18 additions & 0 deletions base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,24 @@ end
INCLUDE_STATE = 3 # include = include_from_node1
include("precompile.jl")

@noinline function test_clone_f(a)
s = zero(eltype(a))
@inbounds @simd for i in 1:length(a)
s += a[i]
end
return s
end

@noinline function test_clone_g(a, n)
s = zero(eltype(a))
for i in 1:n
s += test_clone_f(a)
end
return s
end

test_clone_g(Float64[], 1)

end # baremodule Base

using Base
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ endif
LLVMLINK :=

ifeq ($(JULIACODEGEN),LLVM)
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-gcroot cgmemmgr
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-gcroot llvm-mv cgmemmgr
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
LLVM_LIBS := all
ifeq ($(USE_POLLY),1)
Expand Down
27 changes: 27 additions & 0 deletions src/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,11 @@ JL_DLLEXPORT int jl_running_on_valgrind(void)
return RUNNING_ON_VALGRIND;
}

STATIC_INLINE uint64_t i32_to_i64(uint64_t hi, uint64_t lo)
{
return (hi << 32) | lo;
}

static void jl_load_sysimg_so(void)
{
#ifndef _OS_WINDOWS_
Expand All @@ -242,6 +247,28 @@ static void jl_load_sysimg_so(void)
*sysimg_gvars[tls_offset_idx - 1] =
(jl_value_t*)(uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
#endif
typedef void (*dispatch_t)(uint64_t, uint64_t, uint64_t, size_t*, void***, size_t**);
dispatch_t dispatchf = (dispatch_t)jl_dlsym(jl_sysimg_handle,
"jl_dispatch_sysimg_fvars");
if (dispatchf) {
int32_t info[4];
jl_cpuid(info, 1);
int32_t infoex[4];
jl_cpuidex(infoex, 7, 0);
uint64_t mask = i32_to_i64(info[3], info[2]);
uint64_t emask1 = i32_to_i64(infoex[1], infoex[2]);
uint64_t emask2 = i32_to_i64(infoex[3], 0);
size_t nfunc = 0;
void **fptrs = NULL;
size_t *fidxs = NULL;
dispatchf(mask, emask1, emask2, &nfunc, &fptrs, &fidxs);
if (nfunc && fptrs && fidxs) {
for (size_t i = 0; i < nfunc; i++) {
size_t fi = fidxs[i];
sysimg_fvars[fi] = fptrs[i];
}
}
}
const char *cpu_target = (const char*)jl_dlsym(jl_sysimg_handle, "jl_sysimg_cpu_target");
if (strcmp(cpu_target,jl_options.cpu_target) != 0)
jl_error("Julia and the system image were compiled for different architectures.\n"
Expand Down
3 changes: 2 additions & 1 deletion src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ void addOptimizationPasses(PassManager *PM)
// Let the InstCombine pass remove the unnecessary load of
// safepoint address first
PM->add(createLowerPTLSPass(imaging_mode));
PM->add(createJuliaMVPass());
PM->add(createSROAPass()); // Break up aggregate allocas
#ifndef INSTCOMBINE_BUG
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
Expand Down Expand Up @@ -1088,7 +1089,7 @@ static void jl_gen_llvm_globaldata(llvm::Module *mod, ValueToValueMapTy &VMap,
ArrayType *fvars_type = ArrayType::get(T_pvoidfunc, jl_sysimg_fvars.size());
addComdat(new GlobalVariable(*mod,
fvars_type,
true,
false,
GlobalVariable::ExternalLinkage,
MapValue(ConstantArray::get(fvars_type, ArrayRef<Constant*>(jl_sysimg_fvars)), VMap),
"jl_sysimg_fvars"));
Expand Down
1 change: 1 addition & 0 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ JL_DLLEXPORT extern LLVMContext &jl_LLVMContext;

Pass *createLowerPTLSPass(bool imaging_mode);
Pass *createLowerGCFramePass();
Pass *createJuliaMVPass();
// Whether the Function is an llvm or julia intrinsic.
static inline bool isIntrinsicFunction(Function *F)
{
Expand Down
295 changes: 295 additions & 0 deletions src/llvm-mv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

// Function multi-versioning
#define DEBUG_TYPE "julia_mv"
#undef DEBUG

// LLVM pass to clone function for different archs

#include "llvm-version.h"
#include "support/dtypes.h"

#include <llvm/Pass.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/Analysis/LoopInfo.h>
#if JL_LLVM_VERSION >= 30700
#include <llvm/IR/LegacyPassManager.h>
#else
#include <llvm/PassManager.h>
#endif
#include <llvm/IR/MDBuilder.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/Transforms/Utils/Cloning.h>
#include "fix_llvm_assert.h"

#include "julia.h"
#include "julia_internal.h"

#include <unordered_map>
#include <vector>

using namespace llvm;

extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false);
extern "C" void jl_dump_llvm_value(void *v);

namespace {

struct JuliaMV: public ModulePass {
static char ID;
JuliaMV()
: ModulePass(ID)
{}

private:
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override
{
AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
bool shouldClone(Function &F);
bool checkUses(Function &F, Constant *fary);
bool checkUses(Function &F, Constant *V, Constant *fary, bool &inFVars);
bool checkConstantUse(Function &F, Constant *V, Constant *fary, bool &inFVars);
};

bool JuliaMV::shouldClone(Function &F)
{
if (F.empty())
return false;
auto &LI = getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
if (!LI.empty())
return true;
for (auto &bb: F) {
for (auto &I: bb) {
if (auto call = dyn_cast<CallInst>(&I)) {
if (auto callee = call->getCalledFunction()) {
auto name = callee->getName();
if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
return true;
}
}
}
}
}
return false;
}

bool JuliaMV::checkUses(Function &F, Constant *fary)
{
bool inFVars = false;
bool res = checkUses(F, &F, fary, inFVars);
return res && inFVars;
}

bool JuliaMV::checkConstantUse(Function &F, Constant *V, Constant *fary, bool &inFVars)
{
if (V == fary) {
inFVars = true;
return true;
}
if (auto cexpr = dyn_cast<ConstantExpr>(V)) {
if (cexpr->getOpcode() == Instruction::BitCast) {
return checkUses(F, V, fary, inFVars);
}
}
return false;
}

bool JuliaMV::checkUses(Function &F, Constant *V, Constant *fary, bool &inFVars)
{
for (auto *user: V->users()) {
if (isa<Instruction>(user))
continue;
auto *C = dyn_cast<Constant>(user);
if (!C || !checkConstantUse(F, C, fary, inFVars)) {
return false;
}
}
return true;
}

static Function *getFunction(Value *v)
{
if (auto f = dyn_cast<Function>(v))
return f;
if (auto c = dyn_cast<ConstantExpr>(v)) {
if (c->getOpcode() == Instruction::BitCast) {
return getFunction(c->getOperand(0));
}
}
return nullptr;
}

static void addFeatures(Function *F)
{
auto attr = F->getFnAttribute("target-features");
std::string feature =
"+avx2,+avx,+fma,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3";
if (attr.isStringAttribute()) {
feature += ",";
feature += attr.getValueAsString();
}
F->addFnAttr("target-features", feature);
}

bool JuliaMV::runOnModule(Module &M)
{
MDNode *tbaa_const = tbaa_make_child("jtbaa_const", nullptr, true).first;
GlobalVariable *fvars = M.getGlobalVariable("jl_sysimg_fvars");
// This makes sure this only runs during sysimg generation
if (!fvars || !fvars->hasInitializer())
return true;
auto *fary = dyn_cast<ConstantArray>(fvars->getInitializer());
if (!fary)
return true;
LLVMContext &ctx = M.getContext();
ValueToValueMapTy VMap;
for (auto &F: M) {
if (shouldClone(F) && checkUses(F, fary)) {
Function *NF = Function::Create(cast<FunctionType>(F.getValueType()),
F.getLinkage(), F.getName() + ".avx2", &M);
NF->copyAttributesFrom(&F);
VMap[&F] = NF;
}
}
std::unordered_map<Function*,size_t> idx_map;
size_t nf = fary->getNumOperands();
for (size_t i = 0; i < nf; i++) {
if (Function *ele = getFunction(fary->getOperand(i))) {
auto it = VMap.find(ele);
if (it != VMap.end()) {
idx_map[ele] = i;
}
}
}
for (auto I: idx_map) {
auto oldF = I.first;
auto newF = cast<Function>(VMap[oldF]);
Function::arg_iterator DestI = newF->arg_begin();
for (Function::const_arg_iterator J = oldF->arg_begin(); J != oldF->arg_end(); ++J) {
DestI->setName(J->getName());
VMap[&*J] = &*DestI++;
}
SmallVector<ReturnInst*,8> Returns;
CloneFunctionInto(newF, oldF, VMap, false, Returns);
addFeatures(newF);
}
std::vector<Constant*> ptrs;
std::vector<Constant*> idxs;
auto T_void = Type::getVoidTy(ctx);
auto T_pvoidfunc = FunctionType::get(T_void, false)->getPointerTo();
auto T_size = (sizeof(size_t) == 8 ? Type::getInt64Ty(ctx) : Type::getInt32Ty(ctx));
for (auto I: idx_map) {
auto oldF = I.first;
auto idx = I.second;
auto newF = cast<Function>(VMap[oldF]);
ptrs.push_back(ConstantExpr::getBitCast(newF, T_pvoidfunc));
auto offset = ConstantInt::get(T_size, idx);
idxs.push_back(offset);
for (auto user: oldF->users()) {
auto inst = dyn_cast<Instruction>(user);
if (!inst)
continue;
auto encloseF = inst->getParent()->getParent();
if (VMap.find(encloseF) != VMap.end())
continue;
auto slot = GetElementPtrInst::Create(fary->getType(), fvars, {offset}, "", inst);
Instruction *ptr = new LoadInst(slot, "", inst);
ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
ptr = new BitCastInst(ptr, oldF->getType(), "", inst);
inst->replaceUsesOfWith(oldF, ptr);
}
}
ArrayType *fvars_type = ArrayType::get(T_pvoidfunc, ptrs.size());
auto ptr_gv = new GlobalVariable(M, fvars_type, true, GlobalVariable::InternalLinkage,
ConstantArray::get(fvars_type, ptrs));
ArrayType *idxs_type = ArrayType::get(T_size, idxs.size());
auto idx_gv = new GlobalVariable(M, idxs_type, true, GlobalVariable::InternalLinkage,
ConstantArray::get(idxs_type, idxs));

std::vector<Type*> dispatch_args(0);
dispatch_args.push_back(Type::getInt64Ty(ctx)); // Feature mask
dispatch_args.push_back(Type::getInt64Ty(ctx)); // Extended feature mask1
dispatch_args.push_back(Type::getInt64Ty(ctx)); // Extended feature mask2
dispatch_args.push_back(T_size->getPointerTo());
dispatch_args.push_back(fvars_type->getPointerTo()->getPointerTo());
dispatch_args.push_back(idxs_type->getPointerTo()->getPointerTo());
Function *dispatchF = Function::Create(FunctionType::get(T_void, dispatch_args, false),
Function::ExternalLinkage,
"jl_dispatch_sysimg_fvars", &M);
IRBuilder<> builder(ctx);
BasicBlock *b0 = BasicBlock::Create(ctx, "top", dispatchF);
builder.SetInsertPoint(b0);
DebugLoc noDbg;
builder.SetCurrentDebugLocation(noDbg);

std::vector<Argument*> args;
for (auto &arg: dispatchF->args())
args.push_back(&arg);

auto sz_arg = args[3];
auto fvars_arg = args[4];
auto idxs_arg = args[5];

// Hard code for now
// EDX:ECX
uint64_t mask = 1 | (1 << 9) | (1 << 12) | (1 << 19) | (1 << 20) | (1 << 23) | (1 << 28);
// EBX:ECX
uint64_t emask1 = uint64_t(1) << (5 + 32);
// EDX:0
uint64_t emask2 = 0;

builder.CreateStore(ConstantInt::get(T_size, ptrs.size()), sz_arg);

auto createMaskCmp = [&] (Value *v, uint64_t mask) {
auto maskv = ConstantInt::get(v->getType(), mask);
return builder.CreateICmpEQ(builder.CreateAnd(v, maskv), maskv);
};

auto match_mask = createMaskCmp(args[0], mask);
auto match_emask1 = createMaskCmp(args[1], emask1);
auto match_emask2 = createMaskCmp(args[2], emask2);

auto match = builder.CreateAnd(match_mask, match_emask1);
match = builder.CreateAnd(match, match_emask2);

BasicBlock *match_bb = BasicBlock::Create(ctx, "match");
BasicBlock *fail_bb = BasicBlock::Create(ctx, "fail");
builder.CreateCondBr(match, match_bb, fail_bb);

dispatchF->getBasicBlockList().push_back(match_bb);
builder.SetInsertPoint(match_bb);
builder.CreateStore(ptr_gv, fvars_arg);
builder.CreateStore(idx_gv, idxs_arg);
builder.CreateRetVoid();

dispatchF->getBasicBlockList().push_back(fail_bb);
builder.SetInsertPoint(fail_bb);
builder.CreateStore(ConstantPointerNull::get(fvars_type->getPointerTo()), fvars_arg);
builder.CreateStore(ConstantPointerNull::get(idxs_type->getPointerTo()), idxs_arg);
builder.CreateRetVoid();

// jl_dump_llvm_value(dispatchF);
// jl_dump_llvm_value(ptr_gv);
// jl_dump_llvm_value(idx_gv);

return true;
}

char JuliaMV::ID = 0;
static RegisterPass<JuliaMV> X("JuliaMV", "JuliaMV Pass",
false /* Only looks at CFG */,
false /* Analysis Pass */);

}

Pass *createJuliaMVPass()
{
return new JuliaMV();
}

0 comments on commit 0aefb4e

Please sign in to comment.