Cilk Plus: Paralelismo easy level em C

1,174 views

Published on

Palestra apresentada no TDC Sampa 2013. Códigos de exemplo e referências: https://github.com/fsouza/tdc_cilk_plus

Published in: Technology
  • Be the first to comment

  • Be the first to like this

Cilk Plus: Paralelismo easy level em C

  1. 1. globo .com Cilk Plus Paralelismo easy level em C Saturday, July 13, 13
  2. 2. Por que? Saturday, July 13, 13
  3. 3. The free lunch is over Saturday, July 13, 13
  4. 4. “Most classes of applications have enjoyed free and regular performance gains for several decades, even without releasing new versions or doing anything special... - Herb Sutter, 2005 ”Saturday, July 13, 13
  5. 5. Lei de Moore Saturday, July 13, 13
  6. 6. Saturday, July 13, 13
  7. 7. Como? Saturday, July 13, 13
  8. 8. Saturday, July 13, 13
  9. 9. Fork-Join Message passing SIMD Saturday, July 13, 13
  10. 10. Fork-Join Message passing SIMD Saturday, July 13, 13
  11. 11. https://secure.flickr.com/photos/paulscott56/7771883174/ dafuq is Cilk Plus?! Saturday, July 13, 13
  12. 12. Cilk Plus ‣ Fruto de 15+ anos de trabalho no MIT ‣ Extensão da linguagem C ‣ Especificação aberta ‣ Comercializado pela Intel ‣ Disponível em compiladores open source Saturday, July 13, 13
  13. 13. Cilk++ Saturday, July 13, 13
  14. 14. Cilk++ Saturday, July 13, 13
  15. 15. Saturday, July 13, 13
  16. 16. https://secure.flickr.com/photos/rainiernavidad/2679301542/ Saturday, July 13, 13
  17. 17. 1 Task parallelism Saturday, July 13, 13
  18. 18. Palavras chave ‣ cilk_spawn ‣ cilk_sync ‣ cilk_for Saturday, July 13, 13
  19. 19. spawn + sync int fib(int  n) {    if(n  <  2)  {        return  n;    }    int  x,  y;    x  =  cilk_spawn  fib(n-­‐1);    y  =  fib(n-­‐2);    cilk_sync;    return  x  +  y; } Saturday, July 13, 13
  20. 20. spawn + sync int fib(int  n) {    if(n  <  2)  {        return  n;    }    int  x,  y;    x  =  cilk_spawn  fib(n-­‐1);    y  =  fib(n-­‐2);    cilk_sync;    return  x  +  y; }            cilk_spawn    cilk_sync; Saturday, July 13, 13
  21. 21. spawn + sync int  i; for(i  =  0;  i  <  N;  i++)  {   cilk_spawn  do_something(i); } cilk_sync; Saturday, July 13, 13
  22. 22. cilk_for int  i; cilk_for(i  =  0;  i  <  N;  i++)  {   do_something(i); } Saturday, July 13, 13
  23. 23. Reducers long  result  =  0; cilk_for(i  =  0;  i  <  N;  i++)  {   result  +=  values[i]  *  2; } printf("%ldn",  result); Saturday, July 13, 13
  24. 24. Reducers long  result  =  0; cilk_for(i  =  0;  i  <  N;  i++)  {   result  +=  values[i]  *  2; } printf("%ldn",  result); %  ./double_sum 140735583143608 %  ./double_sum 140735391864504 %  ./double_sum 140735461627576 %  ./double_sum 140735348451000 Saturday, July 13, 13
  25. 25. Reducers long  result  =  0; cilk_for(i  =  0;  i  <  N;  i++)  {     result  +=  values[i]  *  2; } printf("%ldn",  result); Saturday, July 13, 13
  26. 26. Reducers long  result  =  0; cilk_for(i  =  0;  i  <  N;  i++)  {     mtx_lock(&m);   mtx_unlock(&m);   result  +=  values[i]  *  2; } printf("%ldn",  result); Saturday, July 13, 13
  27. 27. https://secure.flickr.com/photos/jenko2k5/7165647503/ #FAIL Saturday, July 13, 13
  28. 28. Reducers CILK_C_REDUCER_OPADD(result,  long,  0); cilk_for(i  =  0;  i  <  N;  i++)  {   REDUCER_VIEW(result)  +=  values[i]  *  2; } printf("%ldn",  result.value); Saturday, July 13, 13
  29. 29. Reducers cilk::reducer_opadd<long>  result; cilk_for(int  i  =  0;  i  <  N;  i++)  {   result  +=  values[i]  *  2; } printf("%ldn",  result.get_value()); Saturday, July 13, 13
  30. 30. Reducers disponíveis ‣ reducer_max ‣ reducer_max_index ‣ reducer_min ‣ reducer_min_index ‣ reducer_opand ‣ reducer_opor ‣ reducer_opxor Saturday, July 13, 13
  31. 31. 2 Data parallelism Saturday, July 13, 13
  32. 32. Thread parallelism Saturday, July 13, 13
  33. 33. Vector Parallelism Saturday, July 13, 13
  34. 34. Vector Parallelism void multiply(int  *a,  int  *b,  int  *c,  int  n) {   int  i;   for(i  =  0;  i  <  n;  i++)  {     c[i]  =  a[i]  *  b[i];   } } Saturday, July 13, 13
  35. 35. Vector Parallelism void multiply(int  *a,  int  *b,  int  *c,  int  n) {   int  i;   __m128i  *pa,  *pb,  pc;   for(i  =  0;  i  <  n;  i  +=  4)  {     pa  =  (__m128i  *)&a[i];     pb  =  (__m128i  *)&b[i];     pc  =  _mm_mullo_epi32(*pa,  *pb);     memcpy(&c[i],  &pc,  4*sizeof(int));   } } Saturday, July 13, 13
  36. 36. Vector Parallelism void multiply(int  *a,  int  *b,  int  *c,  int  n) {   int  i;   for(i  =  0;  i  <  n;  i++)  {     c[i]  =  a[i]  *  b[i];   } } Saturday, July 13, 13
  37. 37. Vector Parallelism void multiply(int  *a,  int  *b,  int  *c,  int  n) {   int  i;   for(i  =  0;  i  <  n;  i++)  {     c[i]  =  a[i]  *  b[i];   } } #pragma  simd Saturday, July 13, 13
  38. 38. #pragma simd long  result  =  0; #pragma  simd  reduction(+:result) for(i  =  0;  i  <  N;  i++)  {   result  +=  values[i]  *  2; } Saturday, July 13, 13
  39. 39. #pragma simd ‣ private ‣ firstprivate ‣ lastprivate ‣ vectorlength ‣ vectorlengthfor ‣ linear Saturday, July 13, 13
  40. 40. #pragma simd void multiply(int  *a,  int  *b,  int  *c,  int  n) {   int  i; #pragma  simd   for(i  =  0;  i  <  n;  i++)  {     c[i]  =  multi(a[i],  b[i]);   } } Saturday, July 13, 13
  41. 41. #pragma simd void multiply(int  *a,  int  *b,  int  *c,  int  n) {   int  i; #pragma  simd   for(i  =  0;  i  <  n;  i++)  {     c[i]  =  multi(a[i],  b[i]);   } } Saturday, July 13, 13
  42. 42. Elemental functions __declspec(vector)  int multi(int  x,  int  y) {   return  x  *  y; } Saturday, July 13, 13
  43. 43. Fortran Saturday, July 13, 13
  44. 44. SAXPY subroutine  saxpy(x,  y,  a,  n)      integer,  intent(in)  ::  n      real(kind=4),  dimension(n),  intent(in)  ::  x      real(kind=4),  dimension(n),  intent(inout)  ::  y      real(kind=4),  intent(in)  ::  a end  subroutine      y  =  y  +  a*x Saturday, July 13, 13
  45. 45. SAXPY subroutine  saxpy(x,  y,  a,  n)      integer,  intent(in)  ::  n      real(kind=4),  dimension(n),  intent(in)  ::  x      real(kind=4),  dimension(n),  intent(inout)  ::  y      real(kind=4),  intent(in)  ::  a end  subroutine      y  =  y  +  a*x Saturday, July 13, 13
  46. 46. Array Notation void saxpy(float  *x,  float  *y,  float  a,  int  n) {   y[0:n]  +=  a  *  x[0:n]; } Saturday, July 13, 13
  47. 47. Array Notation float multiply(float  a,  float  b) {   return  a  *  b; } void saxpy(float  *x,  float  *y,  float  a,   int  n) {   y[0:n]  +=  multiply(a,  x[0:n]); } Saturday, July 13, 13
  48. 48. Array Notation array[first:length:stride] Saturday, July 13, 13
  49. 49. Array Notation array[first:length:stride]length Saturday, July 13, 13
  50. 50. Qual usar? Saturday, July 13, 13
  51. 51. Qual usar? %  clang  -­‐Wall  -­‐o  pragma  pragma.c pragma.c:7:9:  warning:  unknown  pragma  ignored  [-­‐Wunknown-­‐pragmas] #pragma  simd                ^ 1  warning  generated. Saturday, July 13, 13
  52. 52. Qual usar? %  clang  -­‐Wall  -­‐o  pragma  pragma.c pragma.c:7:9:  warning:  unknown  pragma  ignored  [-­‐Wunknown-­‐pragmas] #pragma  simd                ^ 1  warning  generated. %  clang  -­‐o  array  array.c array.c:6:5:  error:  expected  ']'                c[0:n]  =  a[0:n]  *  b[0:n];                      ^ array.c:6:3:  note:  to  match  this  '['                c[0:n]  =  a[0:n]  *  b[0:n];                  ^ array.c:6:14:  error:  expected  ']'                c[0:n]  =  a[0:n]  *  b[0:n];                                        ^ array.c:6:12:  note:  to  match  this  '['                c[0:n]  =  a[0:n]  *  b[0:n];                                    ^ array.c:6:23:  error:  expected  ']'                c[0:n]  =  a[0:n]  *  b[0:n];                                                          ^ array.c:6:21:  note:  to  match  this  '['                c[0:n]  =  a[0:n]  *  b[0:n];                                                      ^ 3  errors  generated. Saturday, July 13, 13
  53. 53. Cilk Plus SDK Saturday, July 13, 13
  54. 54. Ferramentas ‣ Intel Cilk Screen race detector ‣ Intel CilkView scalability analyzer Saturday, July 13, 13
  55. 55. Compiladores Saturday, July 13, 13
  56. 56. icc ‣ Suporte completo, nativo. ‣ http://software.intel.com/en-us/intel-cilk-plus %  icc  -­‐o  source  source.c Saturday, July 13, 13
  57. 57. gcc ‣ Suporte completo em uma branch ‣ [WIP] merge com o trunk para GCC 4.9 ‣ http://gcc.gnu.org/wiki/cilkplus-merge %  gcc  -­‐o  source  -­‐lcilkrts  -­‐ldl  source.c Saturday, July 13, 13
  58. 58. clang ‣ WIP ‣ Suporte parcial ‣ cilk_spawn ‣ cilk_sync ‣ cilk_for ‣ http://cilkplus.github.com/ %  clang  -­‐o  source  -­‐fcilkplus  source.c Saturday, July 13, 13
  59. 59. Links ‣ http://cilkplus.org ‣ http://software.intel.com/en-us/intel-cilk-plus ‣ http://cilkplus.github.com ‣ http://gcc.gnu.org/wiki/cilkplus-merge Saturday, July 13, 13
  60. 60. Cilk PlusParalelismo easy level em C Francisco Souza fss@corp.globo.com @franciscosouza slideshare.net/franciscosouza globo .com Saturday, July 13, 13

×