Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

Chainer-Compiler 動かしてみた

2019/4/22 Chainer Compiler のソースコードをグダグダ語る会 @DeNA での発表資料です。

  • Be the first to comment

Chainer-Compiler 動かしてみた

  1. 1. è – è – $ docker build –f docker/Dockerfile.tmpl -t chainer-compiler:cuda-10.0 . $ nvidia-docker run -i -t --cap-add=SYS_PTRACE --security-opt="seccomp=unconfined” ¥ chainer_compiler:cuda-10.0 /bin/zsh
  2. 2. è =================================== FAILURES =================================== ____________________________ TestPReLU.test_output _____________________________ self = <tests.functions_tests.test_activations.TestPReLU testMethod=test_output> def test_output(self): > self.expect(self.model, self.x) third_party/onnx-chainer/tests/functions_tests/test_activations.py:61: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ third_party/onnx-chainer/tests/helper.py:106: in expect self.check_out_values(test_path, input_names=graph_input_names) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ test_path = 'out/opset7/test_prelu', input_names = ['Input_0'] def check_model_expect(test_path, input_names=None): if not ONNXRUNTIME_AVAILABLE: > raise ImportError('ONNX Runtime is not found on checking module.') E ImportError: ONNX Runtime is not found on checking module. third_party/onnx-chainer/onnx_chainer/testing/test_onnxruntime.py:39: ImportError
  3. 3. è – – –
  4. 4. è è – – $ ./setup.sh $ ./build/tools/run_onnx --device cuda --test data/resnet50 --trace $ ./build/tools/run_onnx --device cuda --test data/mnist --trace
  5. 5. è è – – Check `chainerx::Shape(type.shape().begin(), type.shape().end())' == `a.shape()' failed! in CheckType at ../runtime/xcvm.cc:57: ((1, 8, 28, 28) vs (1, 8, 24, 24)) Shape check failed in output #0: Conv(Input3, Parameter5) -> (Convolution28_Output_0) zsh: abort ./build/tools/run_onnx --device cuda --test data/mnist --trace name: "Convolution28” op_type: "Conv” Attribute { name: "auto_pad” s: "SAME_UPPER” type: STRING }
  6. 6. auto pads = [&node]() { std::vector<int64_t> pads = node.pads(); + + // Complement from auto_pad + if (pads.size() == 0) { + if (node.auto_pad() == "SAME_UPPER") { + const Value* weight = node.input(1); + const int pad_ndim = (weight->type().ndim() - 2)*2; + CHECK_GT(pad_ndim, 0) << weight->type().DebugString(); + pads.resize(pad_ndim); + for (int i = 0; i < pad_ndim/2; ++i) { + pads[i] = pads[i+pad_ndim/2] = weight->type().dims()[i+2] / 2; + } + } + } compiler/emitter.cc:167 Verifying the result... OK: Plus214_Output_0 Elapsed: 23.395 msec OK!
  7. 7. void RunMain(const std::vector<std::string>& argv) { LOG() << "Loading model..." << std::endl; onnx::ModelProto xmodel( LoadLargeProto<onnx::ModelProto>(onnx_path)); // ONNXモデルのロード Model model(xmodel); // onnx::ModelProtoからModelを構築 /* ... */ ModelRunner model_runner(args, initial_free_bytes, &model); // Modelに対してパスを実行しXCVMに変換 /* ... */ for (const std::unique_ptr<TestCase>& test_case : test_cases) { LOG() << "Running for " << test_case->name << std::endl; InOuts inputs(model_runner.params()); InOuts outputs(model_runner.Run(inputs)); // XCVMを用いてモデルを実行 } /* ... */ } tools/run_onnx.cc
  8. 8. è class ModelRunner { public: ModelRunner(const cmdline::parser& args, int64_t initial_free_bytes, Model* model) : model_(model), args_(args), initial_free_bytes_(initial_free_bytes) { if (args.exist("backprop_two_phase")) { /* ... */ } else { LOG() << "Constructing model..." << std::endl; RunDefaultPasses(model->mutable_graph(), args_.exist("backprop")); // 各種パスを適用しLowering CompileModel(model, &xcvm_); // XCVMに変換 } /* ... */ } tools/run_onnx.cc
  9. 9. è void CompileModel(Model* model, std::unique_ptr<XCVM>* xcvm, const char* name = nullptr, bool gen_backprop = false) { std::string out_onnx = args_.get<std::string>("out_onnx"); if (!out_onnx.empty()) { onnx::ModelProto xmodel; model->ToONNX(&xmodel); std::ofstream ofs(out_onnx); CHECK(xmodel.SerializeToOstream(&ofs)); // ONNXのシリアライズ出力 } LOG() << "Generate code..." << std::endl; XCProgramProto xcvm_prog; xcvm::Emit(*model, &xcvm_prog, trace_level() > 0); // ModelをXCVMProgramProtoに変換 const std::string out_xcvm = args_.get<std::string>("out_xcvm"); if (!out_xcvm.empty()) { std::ofstream ofs(out_xcvm); CHECK(ofs) << "Failed to open output XCVM: " << out_xcvm; CHECK(xcvm_prog.SerializeToOstream(&ofs)); // XCVMのシリアライズ出力 } xcvm->reset(new XCVM(xcvm_prog)); // XCVMProgramProtoをXCVMでラップ } tools/run_onnx.cc
  10. 10. è – – è
  11. 11. è – class Model { public: explicit Model(const onnx::ModelProto& xmodel); private: int64_t ir_version_; std::vector<onnx::OperatorSetIdProto> opset_import_; std::string producer_name_; std::string producer_version_; std::string domain_; int64_t model_version_; std::string doc_string_; std::map<std::string, std::string> metadata_props_; std::unique_ptr<Graph> graph_; }; compiler/model.h
  12. 12. è – class Graph { public: explicit Graph(const onnx::GraphProto& xgraph); private: std::vector<Value*> output_values_; std::vector<Value*> input_values_; std::vector<Value*> temp_values_; std::vector<std::unique_ptr<Value>> all_values_; std::vector<Node*> nodes_; std::vector<std::unique_ptr<Node>> nodes_buf_; std::string name_; std::string doc_string_; std::map<std::string, int> ids_; }; compiler/graph.h
  13. 13. è – class Node : public NodeBase { public: Node(const onnx::NodeProto& xnode, const std::vector<Value*>& inputs, const std::vector<Value*>& outputs); Node(const std::string& name, OpType op_type, const std::vector<Value*>& inputs, const std::vector<Value*>& outputs); private: std::vector<Value*> inputs_; std::vector<Value*> outputs_; std::string name_; std::string domain_; std::string doc_string_; bool detached_ = false; }; compiler/node.h
  14. 14. è – class NodeBase { public: enum OpType { kIdentity, kNeg, ... }; protected: std::vector<float> activation_alpha_; bool was_activation_alpha_set_ = false; std::vector<float> activation_beta_; ... }; build/compiler/gen_node_base.h class NodeDef(object): def __init__(self, op_type, num_inputs, num_outputs, **kwargs): self.op_type = op_type self.num_inputs = num_inputs self.num_outputs = num_outputs self.attributes = kwargs self.attributes.update(CHAINER_COMPILERX_GLOBAL_ATTRS) self.attr_defs = {} # To be filled after parsed. NODES.append(self) NodeDef('Identity', 1, 1) NodeDef('Neg', 1, 1) … def gen_gen_node_base_h(): public_lines = [] private_lines = [] public_lines.append('enum OpType {‘) for node in NODES: public_lines.append('k%s,' % (node.op_type)) … compiler/gen_node.py
  15. 15. è – class Value { public: enum Kind { kTemp = 0, kInput = 1, kOutput = 2, kNull = 4 }; Value(const onnx::ValueInfoProto& xvalue, Kind kind); private: Kind kind_{Kind::kTemp}; std::string name_; std::unique_ptr<Type> type_; std::string doc_string_; std::unique_ptr<Tensor> initializer_; std::vector<Node*> users_; Node* producer_ = nullptr; Value* grad_ = nullptr; int counter_ = 0; }; compiler/value.h
  16. 16. è – class Tensor { public: typedef std::unique_ptr<void, decltype(&std::free)> UniqueData; explicit Tensor(const onnx::TensorProto& xtensor); private: std::vector<int64_t> dims_; Dtype dtype_; UniqueData data_; std::string name_; std::string doc_string_; }; compiler/tensor.h
  17. 17. è
  18. 18. è – – • void InferDtypeAndShape(Node* node) { InferDtype(node); } void InferAllDtypeAndShape(Graph* graph) { for (Node* node : graph->GetTopologicallySortedNodes()) { InferDtypeAndShape(node); } } compiler/type_inference.cc
  19. 19. è Dtype CoerceDtype(Dtype dtype0, Dtype dtype1) { if (dtype0 == dtype1) return dtype0; if (dtype0 == Dtype::kUnknown || dtype1 == Dtype::kUnknown) return Dtype::kUnknown; if (dtype0.IsFloat() && !dtype1.IsFloat()) return dtype0; if (!dtype0.IsFloat() && dtype1.IsFloat()) return dtype1; if (dtype0.SizeOf() > dtype1.SizeOf()) return dtype0; if (dtype0.SizeOf() < dtype1.SizeOf()) return dtype1; if (dtype1 == Dtype::kBool) return dtype0; if (dtype0 == Dtype::kBool) return dtype1; if (dtype0 == Dtype::kUInt8 || dtype1 == Dtype::kUInt8) return Dtype::kInt16; CHECK(false) << "Unknown type coerce: " << dtype0.ToString() << " vs " << dtype1.ToString(); } void InferDtype(Node* node) { … case Node::kConv: case Node::kConvTranspose: case Node::kChainerConvGradWeight: { Dtype dtype = CoerceDtype(in0, in1); if (node->inputs().size() >= 3) dtype = CoerceDtype(dtype, node->input(2)->type().dtype()); oset(0, dtype); break; } … } compiler/dtype_inference.cc
  20. 20. è void Simplify(const CompilerConfig& ccfg, Graph* graph, bool gen_backprop) { std::map<Node::OpType, SimplifierFn> simplifiers; /* 各Nodeのsimplifierの登録 */ CHECK(simplifiers.emplace(Node::kConv, ReplaceConv).second); while (replaced) { replaced = false; for (Node* node : graph->GetLiveNodes()) { auto found = simplifiers.find(node->op_type()); if (found == simplifiers.end()) continue; if (found->second(graph, node)) { graph->DetachNode(node); replaced = true; } } } } compiler/simplifier.cc
  21. 21. è bool ReplaceConv(Graph* graph, Node* node) { CHECK_LT(0, node->group()); if (node->group() == 1) return false; GraphBuilder gb(graph, "SimplifyConvGroup", node->output(0)); // Split the input. std::vector<Value*> inputs; for (int i = 0; i < node->group(); ++i) { inputs.push_back(gb.Temp()); } gb.MOp(Node::kSplit, {node->input(0)}, inputs)->set_axis(1); /* weight, bias, outputのsplit処理 */ gb.Op(Node::kConcat, outputs, node->output(0))->producer()->set_axis(1); return true; } compiler/simplifier.cc
  22. 22. è – void DoConstantPropagation(Graph* graph, Node* node) { /* Nodeの入力を集める処理 */ for (size_t i = 0; i < next_values.size(); ++i) { auto& next_value = next_values[i]; GraphBuilder gb(graph, "Const", node->output(i)); if (next_value->is_tensor()) { gb.Op(Node::kConstant, {}, node->output(i))->producer()->set_tensor_value(next_value->ReleaseTensor()); } } /* 置き換え前NodeのDetach */ } void PropagateConstants(Graph* graph) { bool replaced = true; while (replaced) { replaced = false; for (Node* node : graph->GetLiveNodes()) { if (!HasConstantInputsOnly(*node)) continue; if (MaybePropagateConstant(graph, node)) { replaced = true; } } } } compiler/constant_propagation.cc
  23. 23. è – void FuseElementwiseOperations(Graph* graph) { const std::set<Node::OpType> fusable_ops = { Node::kIdentity, Node::kAdd, /* ... */ }; auto is_fusable = [&fusable_ops](const Node& node) { /* ... */ }; FuseAllConnectedNodes("nvrtc", graph, 2, is_fusable); } void FuseOperations(Graph* graph, bool use_tvm, bool use_ngraph) { /* subgraphのfusion */ if (use_ngraph) { FuseNGraphOperations(graph); } if (use_tvm) { FuseTVMOperations(graph); } FuseElementwiseOperations(graph); } compiler/fusion.cc
  24. 24. è – • – – •
  25. 25. è – è – è – è –
  26. 26. è – è – è – è – è
  27. 27. è
  28. 28. è – è – InOuts Run(const InOuts& inputs) { if (trace_level()) std::cerr << "Running XCVM..." << std::endl; InOuts outputs = xcvm_->Run(inputs, xcvm_opts_); /* ... */ return outputs; } compiler/run_onnx.cc void XCVM::Run(XCVMState* state) { /* Stateの初期化 */ while (true) { int pc = state->pc(); if (pc >= program_.size()) break; XCVMOp* op = program_[pc].get(); try { op->Run(state); } catch (...) { std::cerr << "Exception in " << op->debug_info() << std::endl; throw; } compiler/run_onnx.cc
  29. 29. è – class XCVMOp { public: explicit XCVMOp(const XCInstructionProto& inst); virtual void Run(XCVMState* state) = 0; protected: XCInstructionProto inst_; const int64_t id_; const XCInstructionProto::Op op_; const std::string name_; }; compiler/xcvm/xcvm_op.h class ConvOp : public XCVMOp { public: explicit ConvOp(const XCInstructionProto& inst); chainerx::Array RunImpl(XCVMState* st, const chainerx::Array& x, const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b); virtual void Run(XCVMState* st); private: int x; int w; int b; chainerx::StackVector<int64_t, chainerx::kMaxNdim> strides; chainerx::StackVector<int64_t, chainerx::kMaxNdim> pads; int y; }; build/runtime/gen_xcvm_ops.h
  30. 30. è – è – void ConvOp::Run(XCVMState* st) { /* ... */ st->SetArray(y, RunImpl(st, st->GetArray(x), st->GetArray(w), st->GetOptionalArray(b))); /* ... */ } build/runtime/gen_xcvm_ops.cc chainerx::Array ConvOp::RunImpl( XCVMState* st, const chainerx::Array& x, const chainerx::Array& w, const nonstd::optional<chainerx::Array>& b) { return chainerx::Conv(x, w, b, ComplementStride(strides, x), ComplementPad(pads, x)); } build/runtime/gen_xcvm_ops.cc

    Be the first to comment

    Login to see the comments

  • kensukeiizuka

    Apr. 22, 2019
  • DaikiShintani

    Apr. 23, 2019
  • KanSakamoto

    Dec. 19, 2019

2019/4/22 Chainer Compiler のソースコードをグダグダ語る会 @DeNA での発表資料です。

Views

Total views

888

On Slideshare

0

From embeds

0

Number of embeds

130

Actions

Downloads

11

Shares

0

Comments

0

Likes

3

×