è
–
è
–
$ docker build –f docker/Dockerfile.tmpl -t chainer-compiler:cuda-10.0 .
$ nvidia-docker run -i -t --cap-add=SYS_PTRACE --security-opt="seccomp=unconfined” ¥
chainer_compiler:cuda-10.0 /bin/zsh
è
=================================== FAILURES ===================================
____________________________ TestPReLU.test_output _____________________________
self = <tests.functions_tests.test_activations.TestPReLU testMethod=test_output>
def test_output(self):
> self.expect(self.model, self.x)
third_party/onnx-chainer/tests/functions_tests/test_activations.py:61:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
third_party/onnx-chainer/tests/helper.py:106: in expect
self.check_out_values(test_path, input_names=graph_input_names)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
test_path = 'out/opset7/test_prelu', input_names = ['Input_0']
def check_model_expect(test_path, input_names=None):
if not ONNXRUNTIME_AVAILABLE:
> raise ImportError('ONNX Runtime is not found on checking module.')
E ImportError: ONNX Runtime is not found on checking module.
third_party/onnx-chainer/onnx_chainer/testing/test_onnxruntime.py:39: ImportError
è
–
–
–
è
è
–
–
$ ./setup.sh
$ ./build/tools/run_onnx --device cuda --test data/resnet50 --trace
$ ./build/tools/run_onnx --device cuda --test data/mnist --trace
è
è
–
–
Check `chainerx::Shape(type.shape().begin(), type.shape().end())' == `a.shape()' failed! in CheckType
at ../runtime/xcvm.cc:57: ((1, 8, 28, 28) vs (1, 8, 24, 24)) Shape check failed in output #0: Conv(Input3,
Parameter5) -> (Convolution28_Output_0)
zsh: abort ./build/tools/run_onnx --device cuda --test data/mnist --trace
name: "Convolution28”
op_type: "Conv”
Attribute {
name: "auto_pad”
s: "SAME_UPPER”
type: STRING
}
auto pads = [&node]() {
std::vector<int64_t> pads = node.pads();
+
+ // Complement from auto_pad
+ if (pads.size() == 0) {
+ if (node.auto_pad() == "SAME_UPPER") {
+ const Value* weight = node.input(1);
+ const int pad_ndim = (weight->type().ndim() - 2)*2;
+ CHECK_GT(pad_ndim, 0) << weight->type().DebugString();
+ pads.resize(pad_ndim);
+ for (int i = 0; i < pad_ndim/2; ++i) {
+ pads[i] = pads[i+pad_ndim/2] = weight->type().dims()[i+2] / 2;
+ }
+ }
+ }
compiler/emitter.cc:167
Verifying the result...
OK: Plus214_Output_0
Elapsed: 23.395 msec
OK!
void RunMain(const std::vector<std::string>& argv) {
LOG() << "Loading model..." << std::endl;
onnx::ModelProto xmodel(
LoadLargeProto<onnx::ModelProto>(onnx_path)); // ONNXモデルのロード
Model model(xmodel); // onnx::ModelProtoからModelを構築
/* ... */
ModelRunner model_runner(args, initial_free_bytes, &model); // Modelに対してパスを実行しXCVMに変換
/* ... */
for (const std::unique_ptr<TestCase>& test_case : test_cases) {
LOG() << "Running for " << test_case->name << std::endl;
InOuts inputs(model_runner.params());
InOuts outputs(model_runner.Run(inputs)); // XCVMを用いてモデルを実行
}
/* ... */
}
tools/run_onnx.cc
è
class ModelRunner {
public:
ModelRunner(const cmdline::parser& args, int64_t initial_free_bytes, Model* model)
: model_(model), args_(args), initial_free_bytes_(initial_free_bytes) {
if (args.exist("backprop_two_phase")) {
/* ... */
} else {
LOG() << "Constructing model..." << std::endl;
RunDefaultPasses(model->mutable_graph(), args_.exist("backprop")); // 各種パスを適用しLowering
CompileModel(model, &xcvm_); // XCVMに変換
}
/* ... */
}
tools/run_onnx.cc
è
void CompileModel(Model* model, std::unique_ptr<XCVM>* xcvm, const char* name = nullptr, bool gen_backprop = false) {
std::string out_onnx = args_.get<std::string>("out_onnx");
if (!out_onnx.empty()) {
onnx::ModelProto xmodel;
model->ToONNX(&xmodel);
std::ofstream ofs(out_onnx);
CHECK(xmodel.SerializeToOstream(&ofs)); // ONNXのシリアライズ出力
}
LOG() << "Generate code..." << std::endl;
XCProgramProto xcvm_prog;
xcvm::Emit(*model, &xcvm_prog, trace_level() > 0); // ModelをXCVMProgramProtoに変換
const std::string out_xcvm = args_.get<std::string>("out_xcvm");
if (!out_xcvm.empty()) {
std::ofstream ofs(out_xcvm);
CHECK(ofs) << "Failed to open output XCVM: " << out_xcvm;
CHECK(xcvm_prog.SerializeToOstream(&ofs)); // XCVMのシリアライズ出力
}
xcvm->reset(new XCVM(xcvm_prog)); // XCVMProgramProtoをXCVMでラップ
}
tools/run_onnx.cc
è
–
–
è
è
–
class Model {
public:
explicit Model(const onnx::ModelProto& xmodel);
private:
int64_t ir_version_;
std::vector<onnx::OperatorSetIdProto> opset_import_;
std::string producer_name_;
std::string producer_version_;
std::string domain_;
int64_t model_version_;
std::string doc_string_;
std::map<std::string, std::string> metadata_props_;
std::unique_ptr<Graph> graph_;
};
compiler/model.h
è
–
class Graph {
public:
explicit Graph(const onnx::GraphProto& xgraph);
private:
std::vector<Value*> output_values_;
std::vector<Value*> input_values_;
std::vector<Value*> temp_values_;
std::vector<std::unique_ptr<Value>> all_values_;
std::vector<Node*> nodes_;
std::vector<std::unique_ptr<Node>> nodes_buf_;
std::string name_;
std::string doc_string_;
std::map<std::string, int> ids_;
};
compiler/graph.h
è
–
class Node : public NodeBase {
public:
Node(const onnx::NodeProto& xnode, const std::vector<Value*>& inputs, const
std::vector<Value*>& outputs);
Node(const std::string& name, OpType op_type, const std::vector<Value*>& inputs, const
std::vector<Value*>& outputs);
private:
std::vector<Value*> inputs_;
std::vector<Value*> outputs_;
std::string name_;
std::string domain_;
std::string doc_string_;
bool detached_ = false;
};
compiler/node.h
è
–
class NodeBase {
public:
enum OpType {
kIdentity,
kNeg,
...
};
protected:
std::vector<float> activation_alpha_;
bool was_activation_alpha_set_ = false;
std::vector<float> activation_beta_;
...
};
build/compiler/gen_node_base.h
class NodeDef(object):
def __init__(self, op_type, num_inputs, num_outputs, **kwargs):
self.op_type = op_type
self.num_inputs = num_inputs
self.num_outputs = num_outputs
self.attributes = kwargs
self.attributes.update(CHAINER_COMPILERX_GLOBAL_ATTRS)
self.attr_defs = {} # To be filled after parsed.
NODES.append(self)
NodeDef('Identity', 1, 1)
NodeDef('Neg', 1, 1)
…
def gen_gen_node_base_h():
public_lines = []
private_lines = []
public_lines.append('enum OpType {‘)
for node in NODES:
public_lines.append('k%s,' % (node.op_type))
…
compiler/gen_node.py
è
–
class Value {
public:
enum Kind { kTemp = 0, kInput = 1, kOutput = 2, kNull = 4 };
Value(const onnx::ValueInfoProto& xvalue, Kind kind);
private:
Kind kind_{Kind::kTemp};
std::string name_;
std::unique_ptr<Type> type_;
std::string doc_string_;
std::unique_ptr<Tensor> initializer_;
std::vector<Node*> users_;
Node* producer_ = nullptr;
Value* grad_ = nullptr;
int counter_ = 0;
};
compiler/value.h
è
–
class Tensor {
public:
typedef std::unique_ptr<void, decltype(&std::free)> UniqueData;
explicit Tensor(const onnx::TensorProto& xtensor);
private:
std::vector<int64_t> dims_;
Dtype dtype_;
UniqueData data_;
std::string name_;
std::string doc_string_;
};
compiler/tensor.h
è
è
–
–
•
void InferDtypeAndShape(Node* node) {
InferDtype(node);
}
void InferAllDtypeAndShape(Graph* graph) {
for (Node* node : graph->GetTopologicallySortedNodes()) {
InferDtypeAndShape(node);
}
}
compiler/type_inference.cc
è
Dtype CoerceDtype(Dtype dtype0, Dtype dtype1) {
if (dtype0 == dtype1) return dtype0;
if (dtype0 == Dtype::kUnknown || dtype1 == Dtype::kUnknown) return Dtype::kUnknown;
if (dtype0.IsFloat() && !dtype1.IsFloat()) return dtype0;
if (!dtype0.IsFloat() && dtype1.IsFloat()) return dtype1;
if (dtype0.SizeOf() > dtype1.SizeOf()) return dtype0;
if (dtype0.SizeOf() < dtype1.SizeOf()) return dtype1;
if (dtype1 == Dtype::kBool) return dtype0;
if (dtype0 == Dtype::kBool) return dtype1;
if (dtype0 == Dtype::kUInt8 || dtype1 == Dtype::kUInt8) return Dtype::kInt16;
CHECK(false) << "Unknown type coerce: " << dtype0.ToString() << " vs " << dtype1.ToString();
}
void InferDtype(Node* node) {
…
case Node::kConv: case Node::kConvTranspose: case Node::kChainerConvGradWeight: {
Dtype dtype = CoerceDtype(in0, in1);
if (node->inputs().size() >= 3) dtype = CoerceDtype(dtype, node->input(2)->type().dtype());
oset(0, dtype);
break;
}
…
}
compiler/dtype_inference.cc
è
void Simplify(const CompilerConfig& ccfg, Graph* graph, bool gen_backprop) {
std::map<Node::OpType, SimplifierFn> simplifiers;
/* 各Nodeのsimplifierの登録 */
CHECK(simplifiers.emplace(Node::kConv, ReplaceConv).second);
while (replaced) {
replaced = false;
for (Node* node : graph->GetLiveNodes()) {
auto found = simplifiers.find(node->op_type());
if (found == simplifiers.end()) continue;
if (found->second(graph, node)) {
graph->DetachNode(node);
replaced = true;
}
}
}
}
compiler/simplifier.cc
è
bool ReplaceConv(Graph* graph, Node* node) {
CHECK_LT(0, node->group());
if (node->group() == 1) return false;
GraphBuilder gb(graph, "SimplifyConvGroup", node->output(0));
// Split the input.
std::vector<Value*> inputs;
for (int i = 0; i < node->group(); ++i) {
inputs.push_back(gb.Temp());
}
gb.MOp(Node::kSplit, {node->input(0)}, inputs)->set_axis(1);
/* weight, bias, outputのsplit処理 */
gb.Op(Node::kConcat, outputs, node->output(0))->producer()->set_axis(1);
return true;
}
compiler/simplifier.cc
è
–
void DoConstantPropagation(Graph* graph, Node* node) {
/* Nodeの入力を集める処理 */
for (size_t i = 0; i < next_values.size(); ++i) {
auto& next_value = next_values[i];
GraphBuilder gb(graph, "Const", node->output(i));
if (next_value->is_tensor()) {
gb.Op(Node::kConstant, {}, node->output(i))->producer()->set_tensor_value(next_value->ReleaseTensor());
}
}
/* 置き換え前NodeのDetach */
}
void PropagateConstants(Graph* graph) {
bool replaced = true;
while (replaced) {
replaced = false;
for (Node* node : graph->GetLiveNodes()) {
if (!HasConstantInputsOnly(*node)) continue;
if (MaybePropagateConstant(graph, node)) { replaced = true; }
}
}
}
compiler/constant_propagation.cc
è
–
void FuseElementwiseOperations(Graph* graph) {
const std::set<Node::OpType> fusable_ops = {
Node::kIdentity,
Node::kAdd,
/* ... */
};
auto is_fusable = [&fusable_ops](const Node& node) {
/* ... */
};
FuseAllConnectedNodes("nvrtc", graph, 2, is_fusable);
}
void FuseOperations(Graph* graph, bool use_tvm, bool use_ngraph) {
/* subgraphのfusion */
if (use_ngraph) { FuseNGraphOperations(graph); }
if (use_tvm) { FuseTVMOperations(graph); }
FuseElementwiseOperations(graph);
}
compiler/fusion.cc
è
–
•
–
–
•
è
–
è
–
è
–
è
–
è
–
è
–
è
–
è
–
è
è
è
–
è
–
InOuts Run(const InOuts& inputs) {
if (trace_level()) std::cerr << "Running XCVM..." << std::endl;
InOuts outputs = xcvm_->Run(inputs, xcvm_opts_);
/* ... */
return outputs;
}
compiler/run_onnx.cc
void XCVM::Run(XCVMState* state) {
/* Stateの初期化 */
while (true) {
int pc = state->pc();
if (pc >= program_.size()) break;
XCVMOp* op = program_[pc].get();
try {
op->Run(state);
} catch (...) {
std::cerr << "Exception in " << op->debug_info() << std::endl;
throw;
}
compiler/run_onnx.cc
è
–
class XCVMOp {
public:
explicit XCVMOp(const XCInstructionProto& inst);
virtual void Run(XCVMState* state) = 0;
protected:
XCInstructionProto inst_;
const int64_t id_;
const XCInstructionProto::Op op_;
const std::string name_;
};
compiler/xcvm/xcvm_op.h class ConvOp : public XCVMOp {
public:
explicit ConvOp(const XCInstructionProto& inst);
chainerx::Array RunImpl(XCVMState* st, const chainerx::Array& x,
const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b);
virtual void Run(XCVMState* st);
private:
int x;
int w;
int b;
chainerx::StackVector<int64_t, chainerx::kMaxNdim> strides;
chainerx::StackVector<int64_t, chainerx::kMaxNdim> pads;
int y;
};
build/runtime/gen_xcvm_ops.h
è
–
è
–
void ConvOp::Run(XCVMState* st) {
/* ... */
st->SetArray(y, RunImpl(st, st->GetArray(x), st->GetArray(w), st->GetOptionalArray(b)));
/* ... */
}
build/runtime/gen_xcvm_ops.cc
chainerx::Array ConvOp::RunImpl(
XCVMState* st, const chainerx::Array& x, const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b)
{
return chainerx::Conv(x, w, b, ComplementStride(strides, x), ComplementPad(pads, x));
}
build/runtime/gen_xcvm_ops.cc

Chainer-Compiler 動かしてみた

  • 8.
    è – è – $ docker build–f docker/Dockerfile.tmpl -t chainer-compiler:cuda-10.0 . $ nvidia-docker run -i -t --cap-add=SYS_PTRACE --security-opt="seccomp=unconfined” ¥ chainer_compiler:cuda-10.0 /bin/zsh
  • 9.
    è =================================== FAILURES =================================== ____________________________TestPReLU.test_output _____________________________ self = <tests.functions_tests.test_activations.TestPReLU testMethod=test_output> def test_output(self): > self.expect(self.model, self.x) third_party/onnx-chainer/tests/functions_tests/test_activations.py:61: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ third_party/onnx-chainer/tests/helper.py:106: in expect self.check_out_values(test_path, input_names=graph_input_names) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ test_path = 'out/opset7/test_prelu', input_names = ['Input_0'] def check_model_expect(test_path, input_names=None): if not ONNXRUNTIME_AVAILABLE: > raise ImportError('ONNX Runtime is not found on checking module.') E ImportError: ONNX Runtime is not found on checking module. third_party/onnx-chainer/onnx_chainer/testing/test_onnxruntime.py:39: ImportError
  • 10.
  • 11.
    è è – – $ ./setup.sh $ ./build/tools/run_onnx--device cuda --test data/resnet50 --trace $ ./build/tools/run_onnx --device cuda --test data/mnist --trace
  • 12.
    è è – – Check `chainerx::Shape(type.shape().begin(), type.shape().end())'== `a.shape()' failed! in CheckType at ../runtime/xcvm.cc:57: ((1, 8, 28, 28) vs (1, 8, 24, 24)) Shape check failed in output #0: Conv(Input3, Parameter5) -> (Convolution28_Output_0) zsh: abort ./build/tools/run_onnx --device cuda --test data/mnist --trace name: "Convolution28” op_type: "Conv” Attribute { name: "auto_pad” s: "SAME_UPPER” type: STRING }
  • 13.
    auto pads =[&node]() { std::vector<int64_t> pads = node.pads(); + + // Complement from auto_pad + if (pads.size() == 0) { + if (node.auto_pad() == "SAME_UPPER") { + const Value* weight = node.input(1); + const int pad_ndim = (weight->type().ndim() - 2)*2; + CHECK_GT(pad_ndim, 0) << weight->type().DebugString(); + pads.resize(pad_ndim); + for (int i = 0; i < pad_ndim/2; ++i) { + pads[i] = pads[i+pad_ndim/2] = weight->type().dims()[i+2] / 2; + } + } + } compiler/emitter.cc:167 Verifying the result... OK: Plus214_Output_0 Elapsed: 23.395 msec OK!
  • 15.
    void RunMain(const std::vector<std::string>&argv) { LOG() << "Loading model..." << std::endl; onnx::ModelProto xmodel( LoadLargeProto<onnx::ModelProto>(onnx_path)); // ONNXモデルのロード Model model(xmodel); // onnx::ModelProtoからModelを構築 /* ... */ ModelRunner model_runner(args, initial_free_bytes, &model); // Modelに対してパスを実行しXCVMに変換 /* ... */ for (const std::unique_ptr<TestCase>& test_case : test_cases) { LOG() << "Running for " << test_case->name << std::endl; InOuts inputs(model_runner.params()); InOuts outputs(model_runner.Run(inputs)); // XCVMを用いてモデルを実行 } /* ... */ } tools/run_onnx.cc
  • 16.
    è class ModelRunner { public: ModelRunner(constcmdline::parser& args, int64_t initial_free_bytes, Model* model) : model_(model), args_(args), initial_free_bytes_(initial_free_bytes) { if (args.exist("backprop_two_phase")) { /* ... */ } else { LOG() << "Constructing model..." << std::endl; RunDefaultPasses(model->mutable_graph(), args_.exist("backprop")); // 各種パスを適用しLowering CompileModel(model, &xcvm_); // XCVMに変換 } /* ... */ } tools/run_onnx.cc
  • 17.
    è void CompileModel(Model* model,std::unique_ptr<XCVM>* xcvm, const char* name = nullptr, bool gen_backprop = false) { std::string out_onnx = args_.get<std::string>("out_onnx"); if (!out_onnx.empty()) { onnx::ModelProto xmodel; model->ToONNX(&xmodel); std::ofstream ofs(out_onnx); CHECK(xmodel.SerializeToOstream(&ofs)); // ONNXのシリアライズ出力 } LOG() << "Generate code..." << std::endl; XCProgramProto xcvm_prog; xcvm::Emit(*model, &xcvm_prog, trace_level() > 0); // ModelをXCVMProgramProtoに変換 const std::string out_xcvm = args_.get<std::string>("out_xcvm"); if (!out_xcvm.empty()) { std::ofstream ofs(out_xcvm); CHECK(ofs) << "Failed to open output XCVM: " << out_xcvm; CHECK(xcvm_prog.SerializeToOstream(&ofs)); // XCVMのシリアライズ出力 } xcvm->reset(new XCVM(xcvm_prog)); // XCVMProgramProtoをXCVMでラップ } tools/run_onnx.cc
  • 19.
  • 20.
    è – class Model { public: explicitModel(const onnx::ModelProto& xmodel); private: int64_t ir_version_; std::vector<onnx::OperatorSetIdProto> opset_import_; std::string producer_name_; std::string producer_version_; std::string domain_; int64_t model_version_; std::string doc_string_; std::map<std::string, std::string> metadata_props_; std::unique_ptr<Graph> graph_; }; compiler/model.h
  • 21.
    è – class Graph { public: explicitGraph(const onnx::GraphProto& xgraph); private: std::vector<Value*> output_values_; std::vector<Value*> input_values_; std::vector<Value*> temp_values_; std::vector<std::unique_ptr<Value>> all_values_; std::vector<Node*> nodes_; std::vector<std::unique_ptr<Node>> nodes_buf_; std::string name_; std::string doc_string_; std::map<std::string, int> ids_; }; compiler/graph.h
  • 22.
    è – class Node :public NodeBase { public: Node(const onnx::NodeProto& xnode, const std::vector<Value*>& inputs, const std::vector<Value*>& outputs); Node(const std::string& name, OpType op_type, const std::vector<Value*>& inputs, const std::vector<Value*>& outputs); private: std::vector<Value*> inputs_; std::vector<Value*> outputs_; std::string name_; std::string domain_; std::string doc_string_; bool detached_ = false; }; compiler/node.h
  • 23.
    è – class NodeBase { public: enumOpType { kIdentity, kNeg, ... }; protected: std::vector<float> activation_alpha_; bool was_activation_alpha_set_ = false; std::vector<float> activation_beta_; ... }; build/compiler/gen_node_base.h class NodeDef(object): def __init__(self, op_type, num_inputs, num_outputs, **kwargs): self.op_type = op_type self.num_inputs = num_inputs self.num_outputs = num_outputs self.attributes = kwargs self.attributes.update(CHAINER_COMPILERX_GLOBAL_ATTRS) self.attr_defs = {} # To be filled after parsed. NODES.append(self) NodeDef('Identity', 1, 1) NodeDef('Neg', 1, 1) … def gen_gen_node_base_h(): public_lines = [] private_lines = [] public_lines.append('enum OpType {‘) for node in NODES: public_lines.append('k%s,' % (node.op_type)) … compiler/gen_node.py
  • 24.
    è – class Value { public: enumKind { kTemp = 0, kInput = 1, kOutput = 2, kNull = 4 }; Value(const onnx::ValueInfoProto& xvalue, Kind kind); private: Kind kind_{Kind::kTemp}; std::string name_; std::unique_ptr<Type> type_; std::string doc_string_; std::unique_ptr<Tensor> initializer_; std::vector<Node*> users_; Node* producer_ = nullptr; Value* grad_ = nullptr; int counter_ = 0; }; compiler/value.h
  • 25.
    è – class Tensor { public: typedefstd::unique_ptr<void, decltype(&std::free)> UniqueData; explicit Tensor(const onnx::TensorProto& xtensor); private: std::vector<int64_t> dims_; Dtype dtype_; UniqueData data_; std::string name_; std::string doc_string_; }; compiler/tensor.h
  • 27.
  • 28.
    è – – • void InferDtypeAndShape(Node* node){ InferDtype(node); } void InferAllDtypeAndShape(Graph* graph) { for (Node* node : graph->GetTopologicallySortedNodes()) { InferDtypeAndShape(node); } } compiler/type_inference.cc
  • 29.
    è Dtype CoerceDtype(Dtype dtype0,Dtype dtype1) { if (dtype0 == dtype1) return dtype0; if (dtype0 == Dtype::kUnknown || dtype1 == Dtype::kUnknown) return Dtype::kUnknown; if (dtype0.IsFloat() && !dtype1.IsFloat()) return dtype0; if (!dtype0.IsFloat() && dtype1.IsFloat()) return dtype1; if (dtype0.SizeOf() > dtype1.SizeOf()) return dtype0; if (dtype0.SizeOf() < dtype1.SizeOf()) return dtype1; if (dtype1 == Dtype::kBool) return dtype0; if (dtype0 == Dtype::kBool) return dtype1; if (dtype0 == Dtype::kUInt8 || dtype1 == Dtype::kUInt8) return Dtype::kInt16; CHECK(false) << "Unknown type coerce: " << dtype0.ToString() << " vs " << dtype1.ToString(); } void InferDtype(Node* node) { … case Node::kConv: case Node::kConvTranspose: case Node::kChainerConvGradWeight: { Dtype dtype = CoerceDtype(in0, in1); if (node->inputs().size() >= 3) dtype = CoerceDtype(dtype, node->input(2)->type().dtype()); oset(0, dtype); break; } … } compiler/dtype_inference.cc
  • 30.
    è void Simplify(const CompilerConfig&ccfg, Graph* graph, bool gen_backprop) { std::map<Node::OpType, SimplifierFn> simplifiers; /* 各Nodeのsimplifierの登録 */ CHECK(simplifiers.emplace(Node::kConv, ReplaceConv).second); while (replaced) { replaced = false; for (Node* node : graph->GetLiveNodes()) { auto found = simplifiers.find(node->op_type()); if (found == simplifiers.end()) continue; if (found->second(graph, node)) { graph->DetachNode(node); replaced = true; } } } } compiler/simplifier.cc
  • 31.
    è bool ReplaceConv(Graph* graph,Node* node) { CHECK_LT(0, node->group()); if (node->group() == 1) return false; GraphBuilder gb(graph, "SimplifyConvGroup", node->output(0)); // Split the input. std::vector<Value*> inputs; for (int i = 0; i < node->group(); ++i) { inputs.push_back(gb.Temp()); } gb.MOp(Node::kSplit, {node->input(0)}, inputs)->set_axis(1); /* weight, bias, outputのsplit処理 */ gb.Op(Node::kConcat, outputs, node->output(0))->producer()->set_axis(1); return true; } compiler/simplifier.cc
  • 32.
    è – void DoConstantPropagation(Graph* graph,Node* node) { /* Nodeの入力を集める処理 */ for (size_t i = 0; i < next_values.size(); ++i) { auto& next_value = next_values[i]; GraphBuilder gb(graph, "Const", node->output(i)); if (next_value->is_tensor()) { gb.Op(Node::kConstant, {}, node->output(i))->producer()->set_tensor_value(next_value->ReleaseTensor()); } } /* 置き換え前NodeのDetach */ } void PropagateConstants(Graph* graph) { bool replaced = true; while (replaced) { replaced = false; for (Node* node : graph->GetLiveNodes()) { if (!HasConstantInputsOnly(*node)) continue; if (MaybePropagateConstant(graph, node)) { replaced = true; } } } } compiler/constant_propagation.cc
  • 33.
    è – void FuseElementwiseOperations(Graph* graph){ const std::set<Node::OpType> fusable_ops = { Node::kIdentity, Node::kAdd, /* ... */ }; auto is_fusable = [&fusable_ops](const Node& node) { /* ... */ }; FuseAllConnectedNodes("nvrtc", graph, 2, is_fusable); } void FuseOperations(Graph* graph, bool use_tvm, bool use_ngraph) { /* subgraphのfusion */ if (use_ngraph) { FuseNGraphOperations(graph); } if (use_tvm) { FuseTVMOperations(graph); } FuseElementwiseOperations(graph); } compiler/fusion.cc
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
    è – è – InOuts Run(const InOuts&inputs) { if (trace_level()) std::cerr << "Running XCVM..." << std::endl; InOuts outputs = xcvm_->Run(inputs, xcvm_opts_); /* ... */ return outputs; } compiler/run_onnx.cc void XCVM::Run(XCVMState* state) { /* Stateの初期化 */ while (true) { int pc = state->pc(); if (pc >= program_.size()) break; XCVMOp* op = program_[pc].get(); try { op->Run(state); } catch (...) { std::cerr << "Exception in " << op->debug_info() << std::endl; throw; } compiler/run_onnx.cc
  • 40.
    è – class XCVMOp { public: explicitXCVMOp(const XCInstructionProto& inst); virtual void Run(XCVMState* state) = 0; protected: XCInstructionProto inst_; const int64_t id_; const XCInstructionProto::Op op_; const std::string name_; }; compiler/xcvm/xcvm_op.h class ConvOp : public XCVMOp { public: explicit ConvOp(const XCInstructionProto& inst); chainerx::Array RunImpl(XCVMState* st, const chainerx::Array& x, const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b); virtual void Run(XCVMState* st); private: int x; int w; int b; chainerx::StackVector<int64_t, chainerx::kMaxNdim> strides; chainerx::StackVector<int64_t, chainerx::kMaxNdim> pads; int y; }; build/runtime/gen_xcvm_ops.h
  • 41.
    è – è – void ConvOp::Run(XCVMState* st){ /* ... */ st->SetArray(y, RunImpl(st, st->GetArray(x), st->GetArray(w), st->GetOptionalArray(b))); /* ... */ } build/runtime/gen_xcvm_ops.cc chainerx::Array ConvOp::RunImpl( XCVMState* st, const chainerx::Array& x, const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b) { return chainerx::Conv(x, w, b, ComplementStride(strides, x), ComplementPad(pads, x)); } build/runtime/gen_xcvm_ops.cc