SlideShare a Scribd company logo
3,100,000
Transistors
1.4 Billion
Transistors
Ivy Bridge
1971, Intel, 4004
4bits processor
740Khz(8 clock
cycle/instruction cycle)
2012, Intel, 80637
(Ivy Bridge)
64bits processor
4.1Ghz
How(Old Features)
Multithread
Programming
OpenMP PPL / TPL
How(VS2012 New Features)
Auto-
Vectorization
Auto-
Parallelization
C++ AMP
CPU
images source: AMD
performance
portability
productivity
void AddArrays(int n, int * pA, int * pB, int * pC)
{
for (int i=0; i<n; i++)
{
pC[i] = pA[i] + pB[i];
}
}
void AddArrays(int n, int * pA, int * pB, int * pC)
{
for (int i=0; i<n; i++)
{
pC[i] = pA[i] + pB[i];
}
}
#include <amp.h>
using namespace concurrency;
void AddArrays(int n, int * pA, int * pB, int * pC)
{
array_view<int,1> a(n, pA);
array_view<int,1> b(n, pB);
array_view<int,1> sum(n, pC);
parallel_for_each(
sum.extent,
[=](index<1> i) restrict(amp)
{
sum[i] = a[i] + b[i];
}
);
}
void AddArrays(int n, int * pA, int * pB, int * pC)
{
array_view<int,1> a(n, pA);
array_view<int,1> b(n, pB);
array_view<int,1> sum(n, pC);
parallel_for_each(
sum.extent,
[=](index<1> i) restrict(amp)
{
sum[i] = a[i] + b[i];
}
);
}
parallel_for_each:
execute the lambda on
the accelerator once per
thread
extent: the number
and shape of threads to
execute the lambda
index: the thread ID
that is running the
lambda, used to index
into data
restrict(amp): tells the compiler to
check that this code conforms to C++
AMP language restriction
array_view: wrap the data to operate
on the accelerator
array_view variables captured and associated
data copied to accelerator(on demand)
index<1> i(2); index<3> i(2,0,1);
extent<3> e(3,2,2);
index<2> i(0,2);
extent<2> e(3,4);extent<1> e(6);
N <= 128
vector<int> v(10);
extent<2> e(2,5);
array_view<int,2> a(e, v);
//above two lines can also be written
//array_view<int,2> a(2,5,v);
index<2> i(1,3);
int o = a[i]; // or a[i] = 16;
//or int o = a(1, 3);
1. parallel_for_each(
2. e, // e is of type extent<N>
3. [ ](index<N> idx) restrict(amp)
{
// kernel code
}
1. );
• No
• recursion
• 'volatile'
• virtual functions
• pointers to functions
• pointers to member functions
• pointers in structs
• pointers to pointers
• bitfields
• No
• goto or labeled statements
• throw, try, catch
• globals or statics
• dynamic_cast or typeid
• asm declarations
• varargs
• unsupported types
• e.g. char, short, long double
double cos( double d ); // 1a: cpu code
double cos( double d ) restrict(amp); // 1b: amp code
double bar( double d ) restrict(cpu,amp); // 2 : common subset of both
void some_method(array_view<double,2>& c) {
parallel_for_each( c.extent, [=](index<2> idx) restrict(amp)
{
//…
double d0 = c[idx];
double d1 = bar(d0); // ok, bar restrictions include amp
double d2 = cos(d0); // ok, chooses amp overload
//…
});
}
void MatrixMultiplySerial( vector<float>& vC,
const vector<float>& vA,
const vector<float>& vB, int M, int N, int W )
{
for (int row = 0; row < M; row++) {
for (int col = 0; col < N; col++){
float sum = 0.0f;
for(int i = 0; i < W; i++)
sum += vA[row * W + i] * vB[i * N + col];
vC[row * N + col] = sum;
}
}
}
void MatrixMultiplyAMP( vector<float>& vC,
const vector<float>& vA,
const vector<float>& vB, int M, int N, int W )
{
array_view<const float,2> a(M,W,vA),b(W,N,vB);
array_view<float,2> c(M,N,vC);
c.discard_data();
parallel_for_each(c.extent,
[=](index<2> idx) restrict(amp) {
int row = idx[0]; int col = idx[1];
float sum = 0.0f;
for(int i = 0; i < W; i++)
sum += a(row, i) * b(i, col);
c[idx] = sum;
}
);
}
CPUs
System memory
GPUPCIe
GPU
GPU
GPU
Host Accelerator (e.g. discrete G
// enumerate all accelerators
vector<accelerator> accs = accelerator::get_all();
// choose one based on your criteria
accelerator acc = accs[0];
// launch a kernel on it
parallel_for_each(acc.default_view, my_extent, [=]…);
DEMO
vector<int> v(5);
iota(v.begin(), v.end(), 0);
array<int, 1> a(5, v.begin(), v.end());
parallel_for_each(a.extent, [&](index<1> idx)
restrict (amp)
{
a[idx] = a[idx] * 2;
});
copy(a, v.begin());
vector<int> v(5);
iota(v.begin(), v.end(), 0);
array_view<int, 1> av(5, v);
parallel_for_each(av.extent, [=](index<1> idx)
restrict (amp)
{
av[idx] = av[idx] * 2;
});
av.synchronize();
1 accelerator_view acc_view = accelerator().default_view;
2
3 int a = 10;
4 std::vector<int> view_data(size);
5 array_view<int, 1> arr_view(size, view_data);
6 std::vector<int> arr_data(size);
7 array<int, 1> arr(size, arr_data.begin(),acc_view);
8
9 // ‘arr_view’ copied by value
10 // ‘a’ copied by value.
11 // ‘arr’ already allocated on accelerator and copied by ref
12
13 parallel_for_each(
14 arr.extent,
15 [=,&arr](index<1> idx) restrict(amp)
16 {
17 arr[idx] = arr_view[idx] + a;
18 arr_view[idx]++;
19 });
20
21 // ‘arr’ needs to be explictly copied back
22 copy(arr, arr_data.begin());
23
24 // ‘arr_view’ is synchronized back on first use
25 std::cout << arr_view[0] << std::endl;
1. #include <amp.h>
2. #include <amp_math.h>
3. using namespace concurrency;
4. using namespace concurrency::fast_math;
// using namespace
concurrency::precise_math;
5. int main() {
6. float a = 2.2f, b = 3.5f;
7. float result = pow(a,b);
8. std::vector<float> v(1);
9. array_view<float> av(1,v);
10. parallel_for_each(av.extent, [=](index<1>
idx) restrict(amp)
11. {
12. av[idx] = pow(a,b);
13. });
14. }
DEMO
C++ AMP 실천 및 적용 전략
C++ AMP 실천 및 적용 전략
C++ AMP 실천 및 적용 전략
C++ AMP 실천 및 적용 전략
C++ AMP 실천 및 적용 전략
C++ AMP 실천 및 적용 전략
C++ AMP 실천 및 적용 전략
C++ AMP 실천 및 적용 전략

More Related Content

What's hot

A Speculative Technique for Auto-Memoization Processor with Multithreading
A Speculative Technique for Auto-Memoization Processor with MultithreadingA Speculative Technique for Auto-Memoization Processor with Multithreading
A Speculative Technique for Auto-Memoization Processor with Multithreading
Matsuo and Tsumura lab.
 
Low-level Shader Optimization for Next-Gen and DX11 by Emil Persson
Low-level Shader Optimization for Next-Gen and DX11 by Emil PerssonLow-level Shader Optimization for Next-Gen and DX11 by Emil Persson
Low-level Shader Optimization for Next-Gen and DX11 by Emil Persson
AMD Developer Central
 
【論文紹介】Relay: A New IR for Machine Learning Frameworks
【論文紹介】Relay: A New IR for Machine Learning Frameworks【論文紹介】Relay: A New IR for Machine Learning Frameworks
【論文紹介】Relay: A New IR for Machine Learning Frameworks
Takeo Imai
 
OpenCL Heterogeneous Parallel Computing
OpenCL Heterogeneous Parallel ComputingOpenCL Heterogeneous Parallel Computing
OpenCL Heterogeneous Parallel Computing
João Paulo Leonidas Fernandes Dias da Silva
 
Introduction to Data Oriented Design
Introduction to Data Oriented DesignIntroduction to Data Oriented Design
Introduction to Data Oriented Design
Electronic Arts / DICE
 
A Step Towards Data Orientation
A Step Towards Data OrientationA Step Towards Data Orientation
A Step Towards Data Orientation
Electronic Arts / DICE
 
An Open Discussion of RISC-V BitManip, trends, and comparisons _ Claire
 An Open Discussion of RISC-V BitManip, trends, and comparisons _ Claire An Open Discussion of RISC-V BitManip, trends, and comparisons _ Claire
An Open Discussion of RISC-V BitManip, trends, and comparisons _ Claire
RISC-V International
 
第二回 冬のスイッチ大勉強会 - FullColorLED & MPU-6050編 -
第二回 冬のスイッチ大勉強会 - FullColorLED & MPU-6050編 -第二回 冬のスイッチ大勉強会 - FullColorLED & MPU-6050編 -
第二回 冬のスイッチ大勉強会 - FullColorLED & MPU-6050編 -
Wataru Kani
 
bpftrace - Tracing Summit 2018
bpftrace - Tracing Summit 2018bpftrace - Tracing Summit 2018
bpftrace - Tracing Summit 2018
AlastairRobertson9
 
C++ How I learned to stop worrying and love metaprogramming
C++ How I learned to stop worrying and love metaprogrammingC++ How I learned to stop worrying and love metaprogramming
C++ How I learned to stop worrying and love metaprogramming
cppfrug
 
What&rsquo;s new in Visual C++
What&rsquo;s new in Visual C++What&rsquo;s new in Visual C++
What&rsquo;s new in Visual C++
Microsoft
 
TVM VTA (TSIM)
TVM VTA (TSIM) TVM VTA (TSIM)
TVM VTA (TSIM)
Mr. Vengineer
 
PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...
Andrey Karpov
 
Конверсия управляемых языков в неуправляемые
Конверсия управляемых языков в неуправляемыеКонверсия управляемых языков в неуправляемые
Конверсия управляемых языков в неуправляемые
Platonov Sergey
 
Lec06
Lec06Lec06
Java lejos-multithreading
Java lejos-multithreadingJava lejos-multithreading
Java lejos-multithreadingMr. Chanuwan
 
Tiramisu をちょっと、味見してみました。
Tiramisu をちょっと、味見してみました。Tiramisu をちょっと、味見してみました。
Tiramisu をちょっと、味見してみました。
Mr. Vengineer
 
Introduction to Stockfish bitboard representation and magic bitboard
Introduction to Stockfish bitboard representation and magic bitboardIntroduction to Stockfish bitboard representation and magic bitboard
Introduction to Stockfish bitboard representation and magic bitboard
Taiyou Kuo
 
Design and Implementation of GCC Register Allocation
Design and Implementation of GCC Register AllocationDesign and Implementation of GCC Register Allocation
Design and Implementation of GCC Register Allocation
Kito Cheng
 
Q4.11: Using GCC Auto-Vectorizer
Q4.11: Using GCC Auto-VectorizerQ4.11: Using GCC Auto-Vectorizer
Q4.11: Using GCC Auto-Vectorizer
Linaro
 

What's hot (20)

A Speculative Technique for Auto-Memoization Processor with Multithreading
A Speculative Technique for Auto-Memoization Processor with MultithreadingA Speculative Technique for Auto-Memoization Processor with Multithreading
A Speculative Technique for Auto-Memoization Processor with Multithreading
 
Low-level Shader Optimization for Next-Gen and DX11 by Emil Persson
Low-level Shader Optimization for Next-Gen and DX11 by Emil PerssonLow-level Shader Optimization for Next-Gen and DX11 by Emil Persson
Low-level Shader Optimization for Next-Gen and DX11 by Emil Persson
 
【論文紹介】Relay: A New IR for Machine Learning Frameworks
【論文紹介】Relay: A New IR for Machine Learning Frameworks【論文紹介】Relay: A New IR for Machine Learning Frameworks
【論文紹介】Relay: A New IR for Machine Learning Frameworks
 
OpenCL Heterogeneous Parallel Computing
OpenCL Heterogeneous Parallel ComputingOpenCL Heterogeneous Parallel Computing
OpenCL Heterogeneous Parallel Computing
 
Introduction to Data Oriented Design
Introduction to Data Oriented DesignIntroduction to Data Oriented Design
Introduction to Data Oriented Design
 
A Step Towards Data Orientation
A Step Towards Data OrientationA Step Towards Data Orientation
A Step Towards Data Orientation
 
An Open Discussion of RISC-V BitManip, trends, and comparisons _ Claire
 An Open Discussion of RISC-V BitManip, trends, and comparisons _ Claire An Open Discussion of RISC-V BitManip, trends, and comparisons _ Claire
An Open Discussion of RISC-V BitManip, trends, and comparisons _ Claire
 
第二回 冬のスイッチ大勉強会 - FullColorLED & MPU-6050編 -
第二回 冬のスイッチ大勉強会 - FullColorLED & MPU-6050編 -第二回 冬のスイッチ大勉強会 - FullColorLED & MPU-6050編 -
第二回 冬のスイッチ大勉強会 - FullColorLED & MPU-6050編 -
 
bpftrace - Tracing Summit 2018
bpftrace - Tracing Summit 2018bpftrace - Tracing Summit 2018
bpftrace - Tracing Summit 2018
 
C++ How I learned to stop worrying and love metaprogramming
C++ How I learned to stop worrying and love metaprogrammingC++ How I learned to stop worrying and love metaprogramming
C++ How I learned to stop worrying and love metaprogramming
 
What&rsquo;s new in Visual C++
What&rsquo;s new in Visual C++What&rsquo;s new in Visual C++
What&rsquo;s new in Visual C++
 
TVM VTA (TSIM)
TVM VTA (TSIM) TVM VTA (TSIM)
TVM VTA (TSIM)
 
PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...
 
Конверсия управляемых языков в неуправляемые
Конверсия управляемых языков в неуправляемыеКонверсия управляемых языков в неуправляемые
Конверсия управляемых языков в неуправляемые
 
Lec06
Lec06Lec06
Lec06
 
Java lejos-multithreading
Java lejos-multithreadingJava lejos-multithreading
Java lejos-multithreading
 
Tiramisu をちょっと、味見してみました。
Tiramisu をちょっと、味見してみました。Tiramisu をちょっと、味見してみました。
Tiramisu をちょっと、味見してみました。
 
Introduction to Stockfish bitboard representation and magic bitboard
Introduction to Stockfish bitboard representation and magic bitboardIntroduction to Stockfish bitboard representation and magic bitboard
Introduction to Stockfish bitboard representation and magic bitboard
 
Design and Implementation of GCC Register Allocation
Design and Implementation of GCC Register AllocationDesign and Implementation of GCC Register Allocation
Design and Implementation of GCC Register Allocation
 
Q4.11: Using GCC Auto-Vectorizer
Q4.11: Using GCC Auto-VectorizerQ4.11: Using GCC Auto-Vectorizer
Q4.11: Using GCC Auto-Vectorizer
 

Similar to C++ AMP 실천 및 적용 전략

Blazing Fast Windows 8 Apps using Visual C++
Blazing Fast Windows 8 Apps using Visual C++Blazing Fast Windows 8 Apps using Visual C++
Blazing Fast Windows 8 Apps using Visual C++
Microsoft Developer Network (MSDN) - Belgium and Luxembourg
 
LSFMM 2019 BPF Observability
LSFMM 2019 BPF ObservabilityLSFMM 2019 BPF Observability
LSFMM 2019 BPF Observability
Brendan Gregg
 
Introduction to cuda geek camp singapore 2011
Introduction to cuda   geek camp singapore 2011Introduction to cuda   geek camp singapore 2011
Introduction to cuda geek camp singapore 2011
Raymond Tay
 
Introduction to CUDA
Introduction to CUDAIntroduction to CUDA
Introduction to CUDA
Raymond Tay
 
ISCA Final Presentaiton - Compilations
ISCA Final Presentaiton -  CompilationsISCA Final Presentaiton -  Compilations
ISCA Final Presentaiton - CompilationsHSA Foundation
 
Quiz 9
Quiz 9Quiz 9
Managing console
Managing consoleManaging console
Managing console
Shiva Saxena
 
Groovy
GroovyGroovy
Groovy
Zen Urban
 
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321Teddy Hsiung
 
Adam Sitnik "State of the .NET Performance"
Adam Sitnik "State of the .NET Performance"Adam Sitnik "State of the .NET Performance"
Adam Sitnik "State of the .NET Performance"
Yulia Tsisyk
 
State of the .Net Performance
State of the .Net PerformanceState of the .Net Performance
State of the .Net Performance
CUSTIS
 
Introduction to Accelerators
Introduction to AcceleratorsIntroduction to Accelerators
Introduction to Accelerators
Dilum Bandara
 
Getting Started with Raspberry Pi - DCC 2013.1
Getting Started with Raspberry Pi - DCC 2013.1Getting Started with Raspberry Pi - DCC 2013.1
Getting Started with Raspberry Pi - DCC 2013.1
Tom Paulus
 
Accelerating microbiome research with OpenACC
Accelerating microbiome research with OpenACCAccelerating microbiome research with OpenACC
Accelerating microbiome research with OpenACC
Igor Sfiligoi
 
Introduction to CUDA C: NVIDIA : Notes
Introduction to CUDA C: NVIDIA : NotesIntroduction to CUDA C: NVIDIA : Notes
Introduction to CUDA C: NVIDIA : Notes
Subhajit Sahu
 
Exploiting GPU's for Columnar DataFrrames by Kiran Lonikar
Exploiting GPU's for Columnar DataFrrames by Kiran LonikarExploiting GPU's for Columnar DataFrrames by Kiran Lonikar
Exploiting GPU's for Columnar DataFrrames by Kiran Lonikar
Spark Summit
 
Embedded c programming22 for fdp
Embedded c programming22 for fdpEmbedded c programming22 for fdp
Embedded c programming22 for fdpPradeep Kumar TS
 
Евгений Крутько, Многопоточные вычисления, современный подход.
Евгений Крутько, Многопоточные вычисления, современный подход.Евгений Крутько, Многопоточные вычисления, современный подход.
Евгений Крутько, Многопоточные вычисления, современный подход.
Platonov Sergey
 

Similar to C++ AMP 실천 및 적용 전략 (20)

Blazing Fast Windows 8 Apps using Visual C++
Blazing Fast Windows 8 Apps using Visual C++Blazing Fast Windows 8 Apps using Visual C++
Blazing Fast Windows 8 Apps using Visual C++
 
LSFMM 2019 BPF Observability
LSFMM 2019 BPF ObservabilityLSFMM 2019 BPF Observability
LSFMM 2019 BPF Observability
 
Introduction to cuda geek camp singapore 2011
Introduction to cuda   geek camp singapore 2011Introduction to cuda   geek camp singapore 2011
Introduction to cuda geek camp singapore 2011
 
Introduction to CUDA
Introduction to CUDAIntroduction to CUDA
Introduction to CUDA
 
ISCA Final Presentaiton - Compilations
ISCA Final Presentaiton -  CompilationsISCA Final Presentaiton -  Compilations
ISCA Final Presentaiton - Compilations
 
Quiz 9
Quiz 9Quiz 9
Quiz 9
 
Lecture 04
Lecture 04Lecture 04
Lecture 04
 
Managing console
Managing consoleManaging console
Managing console
 
Groovy
GroovyGroovy
Groovy
 
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
ExperiencesSharingOnEmbeddedSystemDevelopment_20160321
 
Adam Sitnik "State of the .NET Performance"
Adam Sitnik "State of the .NET Performance"Adam Sitnik "State of the .NET Performance"
Adam Sitnik "State of the .NET Performance"
 
State of the .Net Performance
State of the .Net PerformanceState of the .Net Performance
State of the .Net Performance
 
Introduction to Accelerators
Introduction to AcceleratorsIntroduction to Accelerators
Introduction to Accelerators
 
Getting Started with Raspberry Pi - DCC 2013.1
Getting Started with Raspberry Pi - DCC 2013.1Getting Started with Raspberry Pi - DCC 2013.1
Getting Started with Raspberry Pi - DCC 2013.1
 
Accelerating microbiome research with OpenACC
Accelerating microbiome research with OpenACCAccelerating microbiome research with OpenACC
Accelerating microbiome research with OpenACC
 
Cpp tutorial
Cpp tutorialCpp tutorial
Cpp tutorial
 
Introduction to CUDA C: NVIDIA : Notes
Introduction to CUDA C: NVIDIA : NotesIntroduction to CUDA C: NVIDIA : Notes
Introduction to CUDA C: NVIDIA : Notes
 
Exploiting GPU's for Columnar DataFrrames by Kiran Lonikar
Exploiting GPU's for Columnar DataFrrames by Kiran LonikarExploiting GPU's for Columnar DataFrrames by Kiran Lonikar
Exploiting GPU's for Columnar DataFrrames by Kiran Lonikar
 
Embedded c programming22 for fdp
Embedded c programming22 for fdpEmbedded c programming22 for fdp
Embedded c programming22 for fdp
 
Евгений Крутько, Многопоточные вычисления, современный подход.
Евгений Крутько, Многопоточные вычисления, современный подход.Евгений Крутько, Многопоточные вычисления, современный подход.
Евгений Крутько, Многопоточные вычисления, современный подход.
 

More from 명신 김

업무를 빼고 가치를 더하는 클라우드 기술
업무를 빼고 가치를 더하는 클라우드 기술업무를 빼고 가치를 더하는 클라우드 기술
업무를 빼고 가치를 더하는 클라우드 기술
명신 김
 
[2020 Ignite Seoul]Azure에서 사용할 수 있는 컨테이너/오케스트레이션 기술 살펴보기
[2020 Ignite Seoul]Azure에서 사용할 수 있는 컨테이너/오케스트레이션 기술 살펴보기[2020 Ignite Seoul]Azure에서 사용할 수 있는 컨테이너/오케스트레이션 기술 살펴보기
[2020 Ignite Seoul]Azure에서 사용할 수 있는 컨테이너/오케스트레이션 기술 살펴보기
명신 김
 
Best of Build Seoul 2019 Keynote
Best of Build Seoul 2019 KeynoteBest of Build Seoul 2019 Keynote
Best of Build Seoul 2019 Keynote
명신 김
 
Passwordless society
Passwordless societyPasswordless society
Passwordless society
명신 김
 
DevOps and Azure Devops 소개, 동향, 그리고 기대효과
DevOps and Azure Devops 소개, 동향, 그리고 기대효과DevOps and Azure Devops 소개, 동향, 그리고 기대효과
DevOps and Azure Devops 소개, 동향, 그리고 기대효과
명신 김
 
Serverless design and adoption
Serverless design and adoptionServerless design and adoption
Serverless design and adoption
명신 김
 
Durable functions
Durable functionsDurable functions
Durable functions
명신 김
 
Azure functions v2 announcement
Azure functions v2 announcementAzure functions v2 announcement
Azure functions v2 announcement
명신 김
 
Azure functions
Azure functionsAzure functions
Azure functions
명신 김
 
Logic apps
Logic appsLogic apps
Logic apps
명신 김
 
Serverless
ServerlessServerless
Serverless
명신 김
 
Azure event grid
Azure event gridAzure event grid
Azure event grid
명신 김
 
Serverless, Azure Functions, Logic Apps
Serverless, Azure Functions, Logic AppsServerless, Azure Functions, Logic Apps
Serverless, Azure Functions, Logic Apps
명신 김
 
Microservices architecture
Microservices architectureMicroservices architecture
Microservices architecture
명신 김
 
Visual studio 2015를 활용한 개발 생산성 및 코드 품질 혁신
Visual studio 2015를 활용한 개발 생산성 및 코드 품질 혁신Visual studio 2015를 활용한 개발 생산성 및 코드 품질 혁신
Visual studio 2015를 활용한 개발 생산성 및 코드 품질 혁신
명신 김
 
Connect(); 2016 한시간 총정리
Connect(); 2016 한시간 총정리Connect(); 2016 한시간 총정리
Connect(); 2016 한시간 총정리
명신 김
 
크로스 플랫폼을 품은 오픈 소스 프레임워크 .NET Core
크로스 플랫폼을 품은 오픈 소스 프레임워크 .NET Core크로스 플랫폼을 품은 오픈 소스 프레임워크 .NET Core
크로스 플랫폼을 품은 오픈 소스 프레임워크 .NET Core
명신 김
 
Coded UI test를 이용한 테스트 자동화
Coded UI test를 이용한 테스트 자동화Coded UI test를 이용한 테스트 자동화
Coded UI test를 이용한 테스트 자동화
명신 김
 
VS2015 C++ new features
VS2015 C++ new featuresVS2015 C++ new features
VS2015 C++ new features
명신 김
 
Welcome to the microsoft madness
Welcome to the microsoft madnessWelcome to the microsoft madness
Welcome to the microsoft madness
명신 김
 

More from 명신 김 (20)

업무를 빼고 가치를 더하는 클라우드 기술
업무를 빼고 가치를 더하는 클라우드 기술업무를 빼고 가치를 더하는 클라우드 기술
업무를 빼고 가치를 더하는 클라우드 기술
 
[2020 Ignite Seoul]Azure에서 사용할 수 있는 컨테이너/오케스트레이션 기술 살펴보기
[2020 Ignite Seoul]Azure에서 사용할 수 있는 컨테이너/오케스트레이션 기술 살펴보기[2020 Ignite Seoul]Azure에서 사용할 수 있는 컨테이너/오케스트레이션 기술 살펴보기
[2020 Ignite Seoul]Azure에서 사용할 수 있는 컨테이너/오케스트레이션 기술 살펴보기
 
Best of Build Seoul 2019 Keynote
Best of Build Seoul 2019 KeynoteBest of Build Seoul 2019 Keynote
Best of Build Seoul 2019 Keynote
 
Passwordless society
Passwordless societyPasswordless society
Passwordless society
 
DevOps and Azure Devops 소개, 동향, 그리고 기대효과
DevOps and Azure Devops 소개, 동향, 그리고 기대효과DevOps and Azure Devops 소개, 동향, 그리고 기대효과
DevOps and Azure Devops 소개, 동향, 그리고 기대효과
 
Serverless design and adoption
Serverless design and adoptionServerless design and adoption
Serverless design and adoption
 
Durable functions
Durable functionsDurable functions
Durable functions
 
Azure functions v2 announcement
Azure functions v2 announcementAzure functions v2 announcement
Azure functions v2 announcement
 
Azure functions
Azure functionsAzure functions
Azure functions
 
Logic apps
Logic appsLogic apps
Logic apps
 
Serverless
ServerlessServerless
Serverless
 
Azure event grid
Azure event gridAzure event grid
Azure event grid
 
Serverless, Azure Functions, Logic Apps
Serverless, Azure Functions, Logic AppsServerless, Azure Functions, Logic Apps
Serverless, Azure Functions, Logic Apps
 
Microservices architecture
Microservices architectureMicroservices architecture
Microservices architecture
 
Visual studio 2015를 활용한 개발 생산성 및 코드 품질 혁신
Visual studio 2015를 활용한 개발 생산성 및 코드 품질 혁신Visual studio 2015를 활용한 개발 생산성 및 코드 품질 혁신
Visual studio 2015를 활용한 개발 생산성 및 코드 품질 혁신
 
Connect(); 2016 한시간 총정리
Connect(); 2016 한시간 총정리Connect(); 2016 한시간 총정리
Connect(); 2016 한시간 총정리
 
크로스 플랫폼을 품은 오픈 소스 프레임워크 .NET Core
크로스 플랫폼을 품은 오픈 소스 프레임워크 .NET Core크로스 플랫폼을 품은 오픈 소스 프레임워크 .NET Core
크로스 플랫폼을 품은 오픈 소스 프레임워크 .NET Core
 
Coded UI test를 이용한 테스트 자동화
Coded UI test를 이용한 테스트 자동화Coded UI test를 이용한 테스트 자동화
Coded UI test를 이용한 테스트 자동화
 
VS2015 C++ new features
VS2015 C++ new featuresVS2015 C++ new features
VS2015 C++ new features
 
Welcome to the microsoft madness
Welcome to the microsoft madnessWelcome to the microsoft madness
Welcome to the microsoft madness
 

Recently uploaded

Enterprise Resource Planning System in Telangana
Enterprise Resource Planning System in TelanganaEnterprise Resource Planning System in Telangana
Enterprise Resource Planning System in Telangana
NYGGS Automation Suite
 
APIs for Browser Automation (MoT Meetup 2024)
APIs for Browser Automation (MoT Meetup 2024)APIs for Browser Automation (MoT Meetup 2024)
APIs for Browser Automation (MoT Meetup 2024)
Boni García
 
Globus Compute wth IRI Workflows - GlobusWorld 2024
Globus Compute wth IRI Workflows - GlobusWorld 2024Globus Compute wth IRI Workflows - GlobusWorld 2024
Globus Compute wth IRI Workflows - GlobusWorld 2024
Globus
 
Developing Distributed High-performance Computing Capabilities of an Open Sci...
Developing Distributed High-performance Computing Capabilities of an Open Sci...Developing Distributed High-performance Computing Capabilities of an Open Sci...
Developing Distributed High-performance Computing Capabilities of an Open Sci...
Globus
 
First Steps with Globus Compute Multi-User Endpoints
First Steps with Globus Compute Multi-User EndpointsFirst Steps with Globus Compute Multi-User Endpoints
First Steps with Globus Compute Multi-User Endpoints
Globus
 
GOING AOT WITH GRAALVM FOR SPRING BOOT (SPRING IO)
GOING AOT WITH GRAALVM FOR  SPRING BOOT (SPRING IO)GOING AOT WITH GRAALVM FOR  SPRING BOOT (SPRING IO)
GOING AOT WITH GRAALVM FOR SPRING BOOT (SPRING IO)
Alina Yurenko
 
2024 RoOUG Security model for the cloud.pptx
2024 RoOUG Security model for the cloud.pptx2024 RoOUG Security model for the cloud.pptx
2024 RoOUG Security model for the cloud.pptx
Georgi Kodinov
 
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Globus
 
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI AppAI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
Google
 
Understanding Globus Data Transfers with NetSage
Understanding Globus Data Transfers with NetSageUnderstanding Globus Data Transfers with NetSage
Understanding Globus Data Transfers with NetSage
Globus
 
AI Pilot Review: The World’s First Virtual Assistant Marketing Suite
AI Pilot Review: The World’s First Virtual Assistant Marketing SuiteAI Pilot Review: The World’s First Virtual Assistant Marketing Suite
AI Pilot Review: The World’s First Virtual Assistant Marketing Suite
Google
 
Climate Science Flows: Enabling Petabyte-Scale Climate Analysis with the Eart...
Climate Science Flows: Enabling Petabyte-Scale Climate Analysis with the Eart...Climate Science Flows: Enabling Petabyte-Scale Climate Analysis with the Eart...
Climate Science Flows: Enabling Petabyte-Scale Climate Analysis with the Eart...
Globus
 
BoxLang: Review our Visionary Licenses of 2024
BoxLang: Review our Visionary Licenses of 2024BoxLang: Review our Visionary Licenses of 2024
BoxLang: Review our Visionary Licenses of 2024
Ortus Solutions, Corp
 
Essentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FMEEssentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FME
Safe Software
 
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoamOpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
takuyayamamoto1800
 
openEuler Case Study - The Journey to Supply Chain Security
openEuler Case Study - The Journey to Supply Chain SecurityopenEuler Case Study - The Journey to Supply Chain Security
openEuler Case Study - The Journey to Supply Chain Security
Shane Coughlan
 
Prosigns: Transforming Business with Tailored Technology Solutions
Prosigns: Transforming Business with Tailored Technology SolutionsProsigns: Transforming Business with Tailored Technology Solutions
Prosigns: Transforming Business with Tailored Technology Solutions
Prosigns
 
Introducing Crescat - Event Management Software for Venues, Festivals and Eve...
Introducing Crescat - Event Management Software for Venues, Festivals and Eve...Introducing Crescat - Event Management Software for Venues, Festivals and Eve...
Introducing Crescat - Event Management Software for Venues, Festivals and Eve...
Crescat
 
Innovating Inference - Remote Triggering of Large Language Models on HPC Clus...
Innovating Inference - Remote Triggering of Large Language Models on HPC Clus...Innovating Inference - Remote Triggering of Large Language Models on HPC Clus...
Innovating Inference - Remote Triggering of Large Language Models on HPC Clus...
Globus
 
Lecture 1 Introduction to games development
Lecture 1 Introduction to games developmentLecture 1 Introduction to games development
Lecture 1 Introduction to games development
abdulrafaychaudhry
 

Recently uploaded (20)

Enterprise Resource Planning System in Telangana
Enterprise Resource Planning System in TelanganaEnterprise Resource Planning System in Telangana
Enterprise Resource Planning System in Telangana
 
APIs for Browser Automation (MoT Meetup 2024)
APIs for Browser Automation (MoT Meetup 2024)APIs for Browser Automation (MoT Meetup 2024)
APIs for Browser Automation (MoT Meetup 2024)
 
Globus Compute wth IRI Workflows - GlobusWorld 2024
Globus Compute wth IRI Workflows - GlobusWorld 2024Globus Compute wth IRI Workflows - GlobusWorld 2024
Globus Compute wth IRI Workflows - GlobusWorld 2024
 
Developing Distributed High-performance Computing Capabilities of an Open Sci...
Developing Distributed High-performance Computing Capabilities of an Open Sci...Developing Distributed High-performance Computing Capabilities of an Open Sci...
Developing Distributed High-performance Computing Capabilities of an Open Sci...
 
First Steps with Globus Compute Multi-User Endpoints
First Steps with Globus Compute Multi-User EndpointsFirst Steps with Globus Compute Multi-User Endpoints
First Steps with Globus Compute Multi-User Endpoints
 
GOING AOT WITH GRAALVM FOR SPRING BOOT (SPRING IO)
GOING AOT WITH GRAALVM FOR  SPRING BOOT (SPRING IO)GOING AOT WITH GRAALVM FOR  SPRING BOOT (SPRING IO)
GOING AOT WITH GRAALVM FOR SPRING BOOT (SPRING IO)
 
2024 RoOUG Security model for the cloud.pptx
2024 RoOUG Security model for the cloud.pptx2024 RoOUG Security model for the cloud.pptx
2024 RoOUG Security model for the cloud.pptx
 
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
Exploring Innovations in Data Repository Solutions - Insights from the U.S. G...
 
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI AppAI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
 
Understanding Globus Data Transfers with NetSage
Understanding Globus Data Transfers with NetSageUnderstanding Globus Data Transfers with NetSage
Understanding Globus Data Transfers with NetSage
 
AI Pilot Review: The World’s First Virtual Assistant Marketing Suite
AI Pilot Review: The World’s First Virtual Assistant Marketing SuiteAI Pilot Review: The World’s First Virtual Assistant Marketing Suite
AI Pilot Review: The World’s First Virtual Assistant Marketing Suite
 
Climate Science Flows: Enabling Petabyte-Scale Climate Analysis with the Eart...
Climate Science Flows: Enabling Petabyte-Scale Climate Analysis with the Eart...Climate Science Flows: Enabling Petabyte-Scale Climate Analysis with the Eart...
Climate Science Flows: Enabling Petabyte-Scale Climate Analysis with the Eart...
 
BoxLang: Review our Visionary Licenses of 2024
BoxLang: Review our Visionary Licenses of 2024BoxLang: Review our Visionary Licenses of 2024
BoxLang: Review our Visionary Licenses of 2024
 
Essentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FMEEssentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FME
 
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoamOpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
OpenFOAM solver for Helmholtz equation, helmholtzFoam / helmholtzBubbleFoam
 
openEuler Case Study - The Journey to Supply Chain Security
openEuler Case Study - The Journey to Supply Chain SecurityopenEuler Case Study - The Journey to Supply Chain Security
openEuler Case Study - The Journey to Supply Chain Security
 
Prosigns: Transforming Business with Tailored Technology Solutions
Prosigns: Transforming Business with Tailored Technology SolutionsProsigns: Transforming Business with Tailored Technology Solutions
Prosigns: Transforming Business with Tailored Technology Solutions
 
Introducing Crescat - Event Management Software for Venues, Festivals and Eve...
Introducing Crescat - Event Management Software for Venues, Festivals and Eve...Introducing Crescat - Event Management Software for Venues, Festivals and Eve...
Introducing Crescat - Event Management Software for Venues, Festivals and Eve...
 
Innovating Inference - Remote Triggering of Large Language Models on HPC Clus...
Innovating Inference - Remote Triggering of Large Language Models on HPC Clus...Innovating Inference - Remote Triggering of Large Language Models on HPC Clus...
Innovating Inference - Remote Triggering of Large Language Models on HPC Clus...
 
Lecture 1 Introduction to games development
Lecture 1 Introduction to games developmentLecture 1 Introduction to games development
Lecture 1 Introduction to games development
 

C++ AMP 실천 및 적용 전략

  • 1.
  • 4. 1971, Intel, 4004 4bits processor 740Khz(8 clock cycle/instruction cycle) 2012, Intel, 80637 (Ivy Bridge) 64bits processor 4.1Ghz
  • 5.
  • 6.
  • 9.
  • 11.
  • 13. void AddArrays(int n, int * pA, int * pB, int * pC) { for (int i=0; i<n; i++) { pC[i] = pA[i] + pB[i]; } } void AddArrays(int n, int * pA, int * pB, int * pC) { for (int i=0; i<n; i++) { pC[i] = pA[i] + pB[i]; } } #include <amp.h> using namespace concurrency; void AddArrays(int n, int * pA, int * pB, int * pC) { array_view<int,1> a(n, pA); array_view<int,1> b(n, pB); array_view<int,1> sum(n, pC); parallel_for_each( sum.extent, [=](index<1> i) restrict(amp) { sum[i] = a[i] + b[i]; } ); }
  • 14. void AddArrays(int n, int * pA, int * pB, int * pC) { array_view<int,1> a(n, pA); array_view<int,1> b(n, pB); array_view<int,1> sum(n, pC); parallel_for_each( sum.extent, [=](index<1> i) restrict(amp) { sum[i] = a[i] + b[i]; } ); } parallel_for_each: execute the lambda on the accelerator once per thread extent: the number and shape of threads to execute the lambda index: the thread ID that is running the lambda, used to index into data restrict(amp): tells the compiler to check that this code conforms to C++ AMP language restriction array_view: wrap the data to operate on the accelerator array_view variables captured and associated data copied to accelerator(on demand)
  • 15. index<1> i(2); index<3> i(2,0,1); extent<3> e(3,2,2); index<2> i(0,2); extent<2> e(3,4);extent<1> e(6); N <= 128
  • 16. vector<int> v(10); extent<2> e(2,5); array_view<int,2> a(e, v); //above two lines can also be written //array_view<int,2> a(2,5,v); index<2> i(1,3); int o = a[i]; // or a[i] = 16; //or int o = a(1, 3);
  • 17. 1. parallel_for_each( 2. e, // e is of type extent<N> 3. [ ](index<N> idx) restrict(amp) { // kernel code } 1. );
  • 18.
  • 19.
  • 20. • No • recursion • 'volatile' • virtual functions • pointers to functions • pointers to member functions • pointers in structs • pointers to pointers • bitfields • No • goto or labeled statements • throw, try, catch • globals or statics • dynamic_cast or typeid • asm declarations • varargs • unsupported types • e.g. char, short, long double
  • 21. double cos( double d ); // 1a: cpu code double cos( double d ) restrict(amp); // 1b: amp code double bar( double d ) restrict(cpu,amp); // 2 : common subset of both void some_method(array_view<double,2>& c) { parallel_for_each( c.extent, [=](index<2> idx) restrict(amp) { //… double d0 = c[idx]; double d1 = bar(d0); // ok, bar restrictions include amp double d2 = cos(d0); // ok, chooses amp overload //… }); }
  • 22. void MatrixMultiplySerial( vector<float>& vC, const vector<float>& vA, const vector<float>& vB, int M, int N, int W ) { for (int row = 0; row < M; row++) { for (int col = 0; col < N; col++){ float sum = 0.0f; for(int i = 0; i < W; i++) sum += vA[row * W + i] * vB[i * N + col]; vC[row * N + col] = sum; } } } void MatrixMultiplyAMP( vector<float>& vC, const vector<float>& vA, const vector<float>& vB, int M, int N, int W ) { array_view<const float,2> a(M,W,vA),b(W,N,vB); array_view<float,2> c(M,N,vC); c.discard_data(); parallel_for_each(c.extent, [=](index<2> idx) restrict(amp) { int row = idx[0]; int col = idx[1]; float sum = 0.0f; for(int i = 0; i < W; i++) sum += a(row, i) * b(i, col); c[idx] = sum; } ); }
  • 24. // enumerate all accelerators vector<accelerator> accs = accelerator::get_all(); // choose one based on your criteria accelerator acc = accs[0]; // launch a kernel on it parallel_for_each(acc.default_view, my_extent, [=]…);
  • 25. DEMO
  • 26.
  • 27. vector<int> v(5); iota(v.begin(), v.end(), 0); array<int, 1> a(5, v.begin(), v.end()); parallel_for_each(a.extent, [&](index<1> idx) restrict (amp) { a[idx] = a[idx] * 2; }); copy(a, v.begin()); vector<int> v(5); iota(v.begin(), v.end(), 0); array_view<int, 1> av(5, v); parallel_for_each(av.extent, [=](index<1> idx) restrict (amp) { av[idx] = av[idx] * 2; }); av.synchronize();
  • 28. 1 accelerator_view acc_view = accelerator().default_view; 2 3 int a = 10; 4 std::vector<int> view_data(size); 5 array_view<int, 1> arr_view(size, view_data); 6 std::vector<int> arr_data(size); 7 array<int, 1> arr(size, arr_data.begin(),acc_view); 8
  • 29. 9 // ‘arr_view’ copied by value 10 // ‘a’ copied by value. 11 // ‘arr’ already allocated on accelerator and copied by ref 12 13 parallel_for_each( 14 arr.extent, 15 [=,&arr](index<1> idx) restrict(amp) 16 { 17 arr[idx] = arr_view[idx] + a; 18 arr_view[idx]++; 19 }); 20
  • 30. 21 // ‘arr’ needs to be explictly copied back 22 copy(arr, arr_data.begin()); 23 24 // ‘arr_view’ is synchronized back on first use 25 std::cout << arr_view[0] << std::endl;
  • 31. 1. #include <amp.h> 2. #include <amp_math.h> 3. using namespace concurrency; 4. using namespace concurrency::fast_math; // using namespace concurrency::precise_math; 5. int main() { 6. float a = 2.2f, b = 3.5f; 7. float result = pow(a,b); 8. std::vector<float> v(1); 9. array_view<float> av(1,v); 10. parallel_for_each(av.extent, [=](index<1> idx) restrict(amp) 11. { 12. av[idx] = pow(a,b); 13. }); 14. }
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
  • 38. DEMO