Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

Prelude to halide_public

1,023 views

Published on

Halide勉強会 @フィックスターズの資料です

Published in: Software
  • Be the first to comment

Prelude to halide_public

  1. 1. . 1. 8 1 21 10 8 1 21 10
  2. 2. è t : : è ln a F è pu ke i – rsx • Fg o h V P LN • .. da k N LP LN – I • 1 S ke iG )0)2 2H v LP M E A ( A 8A C
  3. 3. è 21 . F è 21 . 21 . CA è 2 0. 2 .2 F è 21 . CA 2 0. 2 .2 è 1 8 81 22 1 08 . . .
  4. 4. 3 23 02102 0 .. 2 3 2 3 2
  5. 5. è Vlo s / – DLLI, D FEA F GC H C è – . / W – re hn gb pR sa W • re hn • i sa U gb pR sa – iR xU PdRlv S • iR , N+ -:6 8 0: 64 4/4- 2 2 N CHG • t , 6 4: . . 8I G. 8I G2 6 LF 6 L F ) O ( + 1EN L .H IH LEHG -FF ECDL M A [1] J.Ragan-Kelley, et al, Halide: A Language and Compiler for Optimizing Parallelism, Locality, and Recomputation in Image Processing Pipelines, PLDI 2013
  6. 6. ) ) ) ( ( è iL a v lD – x A – iC H • a n e • gdLr s h iC H • FLo L è 10 D a l – t – 5202. L p 0 5 5 5 0 2 11 50. 5 58
  7. 7. èd a è a S – c – 0 6 C e – F – A 8 6 2 2 6 21 00 .6
  8. 8. 2 7 2 02102 0 .. 2 2 2 Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; }
  9. 9. 2 2 12 10 1 . 1 2 1 2 1 Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; } 8
  10. 10. 2 ) ( ,2 89- 8 -92 33 20198 8 . Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; } F C A
  11. 11. 2 8 (. ) 1 21 ) .10 . Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; } F C A 2) .0
  12. 12. 2 2 02102 0 .. 2 2 2 Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; } A 8 C
  13. 13. 1 è c M [ sc – A • v dhon r sgt Ra – • Iuhr • d Ie p mIpe iI è n mx ic T ]y l T – cS Pa ] – n mx ic S Pa T H F C A ) A A C ( A C A8 8A 8 0 2 . 8C ( C C 6 G 16 8 , 8 8 A 68 8 8 (). 1 0( ,
  14. 14. 3 23 02102 0 .. 2 3 2 3 2 Func blur_x, blur_y; Var x, y; blur_x(x, y) = in(x, y) + in(x+1, y); blur_y(x, y) = (blur_x(x, y) + blur_x(x, y+1)) / 4; for (int y=0; y<height; y++) { for (int x=0; x<width; x++) { blur_x[y][x] = in[y][x] + in[y][x+1]; } } for (int y=0; y<height; y++) { for (int x=0; x<width; x++) { blur_y[y][x] = (blur_x[y][x] + blur_x[y+1][x]) / 4; } } for (int y=0; y<height; y++) { for (int x=0; x<width; x++) { blur_x[0][x] = in[y][x] + in[y][x+1]; blur_x[1][x] = in[y+1][x] + in[y+1][x+1]; } for (int x=0; x<width; x++) { blur_y[y][x] = (blur_x[0][x] + blur_x[1][x]) / 4; } } for (int y=0; y<height; y++) { for (int x=0; x<width; x++) { blur_y[y][x] = (in[x][y] + in[x+1][y] + in[x][y+1] + in[x+1][y+1]) / 4; } }8
  15. 15. 0 4 0 2 11 0. 8 Func box_filter_3x3(Func in) { Func blurx, blury; Var x, y; blurx(x, y) = (in(x-1, y) + in(x, y) + in(x+1, y))/3; blury(x, y) = (blurx(x, y-1) + blurx(x, y) + blurx(x, y+1))/3; if (get_target().has_gpu_feature()) { blury.gpu_tile(x, y, xi, yi, 32, 8); blurx.compute_at(blury, x); } else { blury.tile(x, y, xi, yi, 256, 32).vectorize(xi, 8).parallel(y); blurx.compute_at(blury, x).store_at(blury, x).vectorize(x, 8); } return blury; } F ltd ltd è oC rltAgveC – C xhA PU G èP sp gve ltai – P Cgve n ltd
  16. 16. è1A )5 – 00 IaO VU N 8 o pn è 5/ C ) A C C – ghN c m I elNo è 2 G – .2 . 2 CC D P C FM 8 N idT I H GCD C ) A D ( DC C 8
  17. 17. è – 1 68 8 2-.81 7. . /2 8 78 1 2-. 2 8 78 76 7 2 7201 8 7.8.7:.-
  18. 18. 7 2 02102 0 .. 2 2 2
  19. 19. è) 0 a d C eg èov – s hrF eg – FH i • A a FH plA – tn ( 8 2 2 21 00 .
  20. 20. 8 98 8 2 1 1 10 f(x, y) = in(x, y) + 1; produce f$1 { let f.y.loop_max = f.y.max let f.y.loop_min = f.y.min let f.y.loop_extent = ((f.y.max + 1) - f.y.min) let f.x.loop_max = f.x.max let f.x.loop_min = f.x.min let f.x.loop_extent = ((f.x.max + 1) - f.x.min) for (f.y, f.y.loop_min, f.y.loop_extent) { for (f.x, f.x.loop_min, f.x.loop_extent) { f(f.x, f.x) = in(f.x, f.y) + 1 } } } 28 9 1 8 1 10 01 .
  21. 21. M & FG EF )CEDCE G CB E GF E F EI . 00 1 ) FH8F GF 0C E B (CHB F B E B GG B B 2 CDG A L G CB 0CCD 2 EG G CB ) )C A B G CB . 2EC E A 2 FF BEC B GCE L G CB
  22. 22. èIRVisitor IRGraphVisitor – R a 0 0 • rt a • IRGraphVisitor d e V C a – n ol s C i V èIRMutator – C rt FV 0 0 – vh V A i V è p VA I gx V 0 0 2 11 0./ 8
  23. 23. è A8 C 2 12 10 1 . 1 2 1 2 1 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); }
  24. 24. èF AI 0 3 3 032 11 0. 8 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); } 0 0 3 CA CA
  25. 25. è 8 4 2 02102 4 0 .. 2 4 2 2 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); } 02C8 4 A8 8F
  26. 26. è F 8 . 5 1 21 5.10 . 5 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); } 1 F C A F
  27. 27. è F SCL A 0 8 6 6 6 80 11 60. 8 6 6 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); } 828C 60 8 A C P
  28. 28. 7 2 02102 0 .. 2 2 2
  29. 29. è o A A è hAg C n n e adli – C A Ag – n – n A 8 A F – n A A 2 12 10 1 . 1 2 1 2 1
  30. 30. ) )( ) è bnky ) )( ) – p • p • p • p s p PC • p dg PC – p x p t S • 1 8 1 . 1 S – l o aedv 01 i r P – t crh pv F M è +8 10 801 , 8+ u A 8 98 8 2 1 1 10
  31. 31. 8 Ss 8 . 1 1 c A vu S – • 1 . 48 1 8 S n – • 1 . 48 1 8 c . 1 1 ha S n – • V d V S n vu l t S P i IC Sx a S 310 54 2 48 c i or mpSiC A e F g S P 4 . 8 8 . 48 55 423 1 1 10
  32. 32. è 1 8 0 3 ed è 1 : ! F 1 8 :18 "# I GP – ! 1 : – $% 1 8 0 3 • D a VS – &% 21 .1 1: • D ed D a bc C 8 1 ) 1 8 ( 8 A "# = $ D# ×($# , &# , 1). ≥ 0}
  33. 33. è AC – 1 8 1 8 • !"# = (&, () – 8. 1 1 • *"# = (+, ,) – 1 8 1 8 P • 0 ≤ & < + (⇒ 0 ≤ & ≤ + − 1) • 0 ≤ ( < , (⇒ 0 ≤ ( ≤ , − 1) 8 8 8 23 1 1 10 G I F for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; y<KS; ky++) for (kx=0; x<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; }
  34. 34. è AC – 1 8 1 8 • !"# = (&, (, )&, )() – 8. 1 1 • +"# = (,, -, ./) – 1 8 1 8 P • 0 ≤ & < , (⇒ 0 ≤ ( ≤ , − 1) • 0 ≤ ( < - (⇒ 0 ≤ & ≤ - − 1) • 0 ≤ )& < ., (⇒ 0 ≤ )& ≤ ., − 1) • 0 ≤ )( < .- (⇒ 0 ≤ )( ≤ .- − 1) 8 8 8 23 1 1 10 G I F for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; y<KS; ky++) for (kx=0; x<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; }
  35. 35. è 2 C A D"# C è D"$ C 8 . 4 34 13213 .10 3. 4 3 4 3 %"# = { (, * | 1 0 0 0 0 −1 0 1 0 −1 0 1 0 0 0 0 −1 0 1 −1 ( * H W 1 ≥ 2} %"$ = (, *, 4(, 4* 1 0 0 0 0 0 0 0 −1 0 1 0 0 0 0 −1 0 1 0 0 0 0 0 0 0 −1 0 1 0 0 0 −1 0 0 1 0 0 0 0 0 0 0 −1 0 0 0 1 −1 0 0 0 1 0 0 0 0 0 0 0 −1 0 0 0 −1 ( * 4( 4* H W KS 1 ≥ 2} %"# F for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; y<KS; ky++) for (kx=0; x<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; } O W-1 H-1 y x
  36. 36. è 050 F VC è 050 ! 0 3 ,0. "# A .20 3 1 . 3 $# ("# ) F S è$#' "#' ≪ $#) "#) ⇒ (!+, "#') (!-, "#)) I – (≪: A ) 3 8 3 312 0 0 0 $# ("# ) = Θ# ×("# , 2, 1)4
  37. 37. è F d 6 e è . e N F 6 a – S F g S è T S P AC 2 8 2 6 201 . 3 3 for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; ky<KS; ky++) for (kx=0; kx<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; } !"# $, & = 0, $, 0, &, 0 ) !"* $, &, +$, +& = 0, $, 0, &, 1, +$, 0, +&, 0 ) !"- $, & = 0, $ 0, &, 2 ) T S P T S PF
  38. 38. è3 CFΘ A 85 5 2 32 21 00 . 5 5 7 "#$ %, ' = 0, %, 1, ', 0 + = 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 % ' H W 1 "#. /01 = 0, %, 0, ', 1, 2%, 0, 2' + = 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 % ' 2% 2' H W KS 1
  39. 39. è 0 0 FA IS 3 0 è 0 0 ! 0 3 ,0. "# FA I 0 ..0 . 3 $("# ) M C I 3 8 3 312 0 0 0 $("# ) = F×("# , +# , 1)-
  40. 40. 8 . 3 13213 .10 3. 3 3 !"#$%& ', ), *', *) = ' + *', ) + *) - = 1 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 ' ) *' *) H W KS 1 !"45$6 ', ) = ', ) - = 1 0 0 0 0 0 1 0 0 0 ' ) H W 1 9 for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; y<KS; ky++) for (kx=0; x<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; }
  41. 41. è 8102. 82 0 2 AF C AF è S 8102. 82 0 2 !"# $" – % • 0: 4 0 2 11 0. !"# $" = %×!"($")
  42. 42. ) ( 4 2 11 . 8 !"# $, & = 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 $ & N M 1 !"#, $, & = 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 0 $ & N M 1 for (i=0; i<N; i++) for (j=0; j<M; j++) S1(i, j); for (j=0; j<M; j++) for (i=0; i<N; i++) S1(i, j); . = 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 × F A 0 C
  43. 43. è 2 28 D CVihFh I 2 8 .20 e S I – e F 2 gc è 2 28 ! " 2 281280 , 421 #$%& P dL a A 8 4 2 2 21 #$%& = ()*, ),) .$ .& /$%& ×()$, )&, 1$, 1&, 1)& ≥ ≥ = 4}
  44. 44. C 1 . 48 8 .4 F D – C 1 . 48 8 .4 4 . 8 8 . 48 423 1 1 10 1 1 01 8 310 . for (i=0; i<N; i++) { S: s[i] = 0; for (j=0; j<M; j++) { T: s[i] = s[i] + a[i][j] * x[j]; } } AI !"#$ = 1 0 0 0 0 0 −1 0 0 1 0 0 0 1 0 0 0 0 0 −1 0 1 0 0 0 0 1 0 0 0 0 0 −1 0 1 0 1 −1 0 0 0 0 )" )$ *$ + M 1 ≥ ≥ ≥ ≥ ≥ ≥ = 0
  45. 45. A 0 CD – 0 – 0 8 44 12 0 0 0 080 0 . 4 20 for (i=0; i<N; i++) { S: s[i] = 0; for (j=0; j<M; j++) { T: s[i] = s[i] + a[i][j] * x[j]; } } I F !"#$ = 1 0 0 0 0 0 −1 0 0 1 0 0 0 1 0 0 0 0 0 −1 0 1 0 0 0 0 1 0 0 0 0 0 −1 0 1 0 1 −1 0 0 0 0 )" )$ *$ + M 1 ≥ ≥ ≥ ≥ ≥ ≥ = 0
  46. 46. D. 2 5 8 58 F – 2 5 8 58 – . 2 5 8 58 D. 2 A 002 80 5 8 S P – !: #$ = #& 5 5 8 5 4 2 2 21 '$(& = 1 0 0 0 0 0 −1 0 0 1 0 0 0 1 0 0 0 0 0 −1 0 1 0 0 0 0 1 0 0 0 0 0 −1 0 1 0 1 −1 0 0 0 0 #$ #& ,& - M 1 ≥ ≥ ≥ ≥ ≥ ≥ = 0 for (i=0; i<N; i++) { S: s[i] = 0; for (j=0; j<M; j++) { T: s[i] = s[i] + a[i][j] * x[j]; } } MIC 2 281280A A421
  47. 47. 0 èP 6 4. 4 1 . !"#$ %"#$ DC & L 0 F – D & L 0 F A 2 8 8 6 2 4 201 8 8 . %"#$ = &(" )" − &($ )$ ≫ 0 !"#$ = 1 0 0 0 0 0 −1 0 0 1 0 0 0 1 0 0 0 0 0 −1 0 1 0 0 0 0 1 0 0 0 0 0 −1 0 1 0 1 −1 0 0 0 0 ." .$ /$ 0 M 1 ≥ ≥ ≥ ≥ ≥ ≥ = 0
  48. 48. 8 .74 4 1 21 .10 . 4 4
  49. 49. è yit – 0 F8 2 / ro sm • - 2 2 • 148 C C 4 • / F 44 AA C 4 – x eHagl kc dHe • vb e uM kph – -0uM . 2 F P S y n L I è H – 8 A 8C 4 2: 2 C 2 F8 2 A 2 A 2 8 A A D
  50. 50. èF I A – 0 8 4 20/9.4 /04 HC è . 4 .9 98 9. 44 9 12 90 09 0/ Func matmul(Func a, Func b, int size) { Func c; Var i, j; RDom k(0, size); c(i, j) = 0; c(i, j) += a(k, j) * b(i, k); return c; } for (c$3.s0.j, c$3.s0.j.loop_min, c$3.s0.j.loop_extent) { for (c$3.s0.i, c$3.s0.i.loop_min, c$3.s0.i.loop_extent) { c$3(c$3.s0.i, c$3.s0.j) = 0 } } for (c$3.s1.j, c$3.s1.j.loop_min, c$3.s1.j.loop_extent) { for (c$3.s1.i, c$3.s1.i.loop_min, c$3.s1.i.loop_extent) { for (c$3.s1.r78$x, 0, 100) { c$3(c$3.s1.i, c$3.s1.j) = (c$3(c$3.s1.i, c$3.s1.j) + (a$3(c$3.s1.r78$x, c$3.s1.j)*b$3(c$3.s1.i, c$3.s1.r78$x))) } } } .4 /0 .4 /0
  51. 51. 2 02102 0 .. 2 2 25 Building polyhedral models... Iteration Sets := (c$3.s0.j, c$3.s0.i) Domain := [c$3.s0.j.loop_min, ((c$3.s0.j.loop_min + c$3.s0.j.loop_extent) + -1)], [c$3.s0.i.loop_min, ((c$3.s0.i.loop_min + c$3.s0.i.loop_extent) + -1)] Schedule := (2, c$3.s0.j, 0, c$3.s0.i, 0) Provides := c$3 := (c$3.s0.i, c$3.s0.j) : (c$3.s0.i, c$3.s0.j) Iteration Sets := (c$3.s1.j, c$3.s1.i, c$3.s1.r78$x) Domain := [c$3.s1.j.loop_min, ((c$3.s1.j.loop_min + c$3.s1.j.loop_extent) + -1)], [c$3.s1.i.loop_min, ((c$3.s1.i.loop_min + c$3.s1.i.loop_extent) + -1)], [0, 99] Schedule := (3, c$3.s1.j, 0, c$3.s1.i, 0, c$3.s1.r78$x, 0) Provides := c$3 := (c$3.s1.i, c$3.s1.j) : (c$3.s1.i, c$3.s1.j) Calls := c$3 := (c$3.s1.i, c$3.s1.j) : (c$3.s1.i, c$3.s1.j) a$3 := (c$3.s1.r78$x, c$3.s1.j) : (c$3.s1.r78$x, c$3.s1.j) b$3 := (c$3.s1.i, c$3.s1.r78$x) : (c$3.s1.i, c$3.s1.r78$x)
  52. 52. 0 5 5 5 0 2 11 50. 5 58 for (c$3.s0.j, c$3.s0.j.loop_min, c$3.s0.j.loop_extent) { for (c$3.s0.i, c$3.s0.i.loop_min, c$3.s0.i.loop_extent) { c$3(c$3.s0.i, c$3.s0.j) = 0 } } for (c$3.s1.j, c$3.s1.j.loop_min, c$3.s1.j.loop_extent) { for (c$3.s1.i, c$3.s1.i.loop_min, c$3.s1.i.loop_extent) { for (c$3.s1.r78$x, 0, 100) { c$3(c$3.s1.i, c$3.s1.j) = (c$3(c$3.s1.i, c$3.s1.j) + (a$3(c$3.s1.r78$x, c$3.s1.j)*b$3(c$3.s1.i, c$3.s1.r78$x))) } } } parallel (c$3.s0.j, c$3.s0.j.loop_min, c$3.s0.j.loop_extent) { for (c$3.s0.i, c$3.s0.i.loop_min, c$3.s0.i.loop_extent) { c$3(c$3.s0.i, c$3.s0.j) = 0 } } parallel (c$3.s1.j, c$3.s1.j.loop_min, c$3.s1.j.loop_extent) { for (c$3.s1.i, c$3.s1.i.loop_min, c$3.s1.i.loop_extent) { for (c$3.s1.r78$x, 0, 100) { c$3(c$3.s1.i, c$3.s1.j) = (c$3(c$3.s1.i, c$3.s1.j) + (a$3(c$3.s1.r78$x, c$3.s1.j)*b$3(c$3.s1.i, c$3.s1.r78$x))) } } } 10 10
  53. 53. è 2 5 2 2 8 8 5 5 25 201 8 8 . Building polyhedral models... Iteration Sets := (f.s1.r4$x) Domain := [2, 99] Schedule := (1, f.s1.r4$x, 0) Provides := f := (f.s1.r4$x) : (f.s1.r4$x) Calls := f := ((f.s1.r4$x + -2)) : (f.s1.r4$x) f := ((f.s1.r4$x + -1)) : (f.s1.r4$x) Flow: f(f.s1.r4$x) -> f((f.s1.r4$x + -2)) : (=, -, =) Flow: f(f.s1.r4$x) -> f((f.s1.r4$x + -1)) : (=, -, =) f(x) = x; f(r.x) = f(r.x-2) + f(r.x-1); for (f.s0.x, f.s0.x.loop_min, f.s0.x.loop_extent) { f(f.s0.x) = f.s0.x } for (f.s1.r4$x, 2, 98) { f(f.s1.r4$x) = (f((f.s1.r4$x + -2)) + f((f.s1.r4$x + -1))) } 2. 2. FAC
  54. 54. 8 . 5 3 13213 5.10 3. 5 3 3
  55. 55. è 5401 – odHag il n D . – r L rAp – H rA v R S Ia è 5401 – Rtxs ehC a F eh – 4 8 4 55 42 1 1 10

×