SlideShare a Scribd company logo
1 of 6
Download to read offline
CUDA – First Programs
“Hello, world” is traditionally the first program we write. We can do the same for CUDA. Here it is:
In file hello.cu:
#include "stdio.h"
int main()
{
printf("Hello, worldn");
return 0;
}
On our Tesla machine, you can compile and this with:
$ nvcc hello.cu
$ ./a.out
You can change the output file name with the –o flag: nvcc –o hello hello.cu
If you edit your .bashrc file you can also add your current directory to your path if you don’t want to
have to type the preceding . all of the time, which refers to the current working directory. Add
export PATH=$PATH:.
To the .bashrc file. Some would recommend not doing this for security purposes.
You might consider this program to be cheating, since it doesn’t really use any CUDA functionality.
Everything runs on the host. However, the point is that CUDA C programs can do everything a regular C
program can do.
Here is a slightly more interesting (but inefficient and only useful as an example) program that adds two
numbers together using a kernel function:
#include "stdio.h"
__global__ void add(int a, int b, int *c)
{
*c = a + b;
}
int main()
{
int a,b,c;
int *dev_c;
a=3;
b=4;
cudaMalloc((void**)&dev_c, sizeof(int));
add<<<1,1>>>(a,b,dev_c);
cudaMemcpy(&c, dev_c, sizeof(int), cudaMemcpyDeviceToHost);
printf("%d + %d is %dn", a, b, c);
cudaFree(dev_c);
return 0;
}
To do in class: walk through program; show similar program in straight C and it runs much faster! Why?
cudaMalloc returns cudaSuccess if it was successful; could check to ensure that the program will run
correctly.
Example: Summing Vectors
This is a simple problem. Given two vectors (i.e. arrays), we would like to add them together in a third
array. For example:
A = {0, 2, 4, 6, 8}
B = {1, 1, 2, 2, 1}
Then A + B =
C = {1, 3, 6, 8, 9}
In this example the array is 5 elements long, so our approach will be to create 5 different threads. The
first thread is responsible for computing C[0] = A[0] + B[0]. The second thread is responsible for
computing C[1] = A[1] + B[1], and so forth.
Here is how we can do this with traditional C code:
#include "stdio.h"
#define N 10
void add(int *a, int *b, int *c)
{
int tID = 0;
while (tID < N)
{
c[tID] = a[tID] + b[tID];
tID += 1;
}
}
int main()
{
int a[N], b[N], c[N];
// Fill Arrays
for (int i = 0; i < N; i++)
{
a[i] = i,
b[i] = 1;
}
add (a, b, c);
for (int i = 0; i < N; i++)
{
printf("%d + %d = %dn", a[i], b[i], c[i]);
}
return 0;
}
This is a rather roundabout way to add two arrays – our reason is because this will translate a little nicer
to the CUDA version. To compile and run it, we have to use g++ (since it uses some C++ style notations
that don’t work in C). Here is the CUDA version:
#include "stdio.h"
#define N 10
__global__ void add(int *a, int *b, int *c)
{
int tID = blockIdx.x;
if (tID < N)
{
c[tID] = a[tID] + b[tID];
}
}
int main()
{
int a[N], b[N], c[N];
int *dev_a, *dev_b, *dev_c;
cudaMalloc((void **) &dev_a, N*sizeof(int));
cudaMalloc((void **) &dev_b, N*sizeof(int));
cudaMalloc((void **) &dev_c, N*sizeof(int));
// Fill Arrays
for (int i = 0; i < N; i++)
{
a[i] = i,
b[i] = 1;
}
cudaMemcpy(dev_a, a, N*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, N*sizeof(int), cudaMemcpyHostToDevice);
add<<<N,1>>>(dev_a, dev_b, dev_c);
cudaMemcpy(c, dev_c, N*sizeof(int), cudaMemcpyDeviceToHost);
for (int i = 0; i < N; i++)
{
printf("%d + %d = %dn", a[i], b[i], c[i]);
}
return 0;
}
blockIDx.x gives us the Block ID, which ranges from 0 to N-1. What if we used add<<<1,N>>>
instead? Then we can access by the ThreadID which is the variable threadIDx.x.
As another example, let’s add two 2D arrays. We can define a 2D array of ints as follows:
int c[2][3];
The following code illustrates how the 2D array is laid out in memory:
for (int i=0; i < 2; i++)
for (int j=0; j< 3; j++)
printf("[%d][%d] at %ldn",i,j,&c[i][j]);
Output:
[0][0] at 140733933298160
[0][1] at 140733933298164
[0][2] at 140733933298168
[1][0] at 140733933298172
[1][1] at 140733933298176
[1][2] at 140733933298180
We can see that we have a layout where the next cell in the j dimension occupies the next sequential
integer in memory, where an int is 4 bytes:
c[0][0] at &c c[0][1] at &c + 4 c[0][2] at &c + 8
c[1][0] at &c + 12 c[1][1] at &c + 16 c[1][2] at &c + 20
In general, the address of a cell can be computed by:
&c + [(sizeof(int) * sizeof-j-dimension * i] + (sizeof(int)) * j
In our example the size of the j dimension is 3. For example, the cell at c[1][1] would be combined as
the base address + (4*3*1) + (4*1) = &c+16.
C will do the addressing for us if we use the array notation, so if INDEX=i*WIDTH + J
then we can access the element via: c[INDEX]
CUDA requires we allocate memory as a one-dimensional array, so we can use the mapping above to a
2D array.
To make the mapping a little easier in the kernel function we can declare the blocks to be in a grid that is
the same dimensions as the 2D array. This will create variables blockIdx.x and blockIdx.y that
correspond to the width and height of the array.
#include "stdio.h"
#define COLUMNS 3
#define ROWS 2
__global__ void add(int *a, int *b, int *c)
{
int x = blockIdx.x;
int y = blockIdx.y;
int i = (COLUMNS*y) + x;
c[i] = a[i] + b[i];
}
int main()
{
int a[ROWS][COLUMNS], b[ROWS][COLUMNS], c[ROWS][COLUMNS];
int *dev_a, *dev_b, *dev_c;
cudaMalloc((void **) &dev_a, ROWS*COLUMNS*sizeof(int));
cudaMalloc((void **) &dev_b, ROWS*COLUMNS*sizeof(int));
cudaMalloc((void **) &dev_c, ROWS*COLUMNS*sizeof(int));
for (int y = 0; y < ROWS; y++) // Fill Arrays
for (int x = 0; x < COLUMNS; x++)
{
a[y][x] = x;
b[y][x] = y;
}
cudaMemcpy(dev_a, a, ROWS*COLUMNS*sizeof(int),
cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, ROWS*COLUMNS*sizeof(int),
cudaMemcpyHostToDevice);
dim3 grid(COLUMNS,ROWS);
add<<<grid,1>>>(dev_a, dev_b, dev_c);
cudaMemcpy(c, dev_c, ROWS*COLUMNS*sizeof(int),
cudaMemcpyDeviceToHost);
for (int y = 0; y < ROWS; y++) // Output Arrays
{
for (int x = 0; x < COLUMNS; x++)
{
printf("[%d][%d]=%d ",y,x,c[y][x]);
}
printf("n");
}
return 0;
}

More Related Content

What's hot

ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions Dr. Volkan OBAN
 
Doubly linklist
Doubly linklistDoubly linklist
Doubly linklistilsamaryum
 
Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.Dr. Volkan OBAN
 
python高级内存管理
python高级内存管理python高级内存管理
python高级内存管理rfyiamcool
 
Advanced Data Visualization in R- Somes Examples.
Advanced Data Visualization in R- Somes Examples.Advanced Data Visualization in R- Somes Examples.
Advanced Data Visualization in R- Somes Examples.Dr. Volkan OBAN
 
Frsa
FrsaFrsa
Frsa_111
 
OOP 2012 - Hint: Dynamic allocation in c++
OOP 2012 - Hint: Dynamic allocation in c++OOP 2012 - Hint: Dynamic allocation in c++
OOP 2012 - Hint: Dynamic allocation in c++Allan Sun
 
Pointer Events in Canvas
Pointer Events in CanvasPointer Events in Canvas
Pointer Events in Canvasdeanhudson
 
Advanced Data Visualization Examples with R-Part II
Advanced Data Visualization Examples with R-Part IIAdvanced Data Visualization Examples with R-Part II
Advanced Data Visualization Examples with R-Part IIDr. Volkan OBAN
 
Scientific Computing with Python Webinar March 19: 3D Visualization with Mayavi
Scientific Computing with Python Webinar March 19: 3D Visualization with MayaviScientific Computing with Python Webinar March 19: 3D Visualization with Mayavi
Scientific Computing with Python Webinar March 19: 3D Visualization with MayaviEnthought, Inc.
 
Class array
Class arrayClass array
Class arraynky92
 
Generative Adversarial Nets
Generative Adversarial NetsGenerative Adversarial Nets
Generative Adversarial NetsJinho Lee
 

What's hot (20)

ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions
 
Doubly linklist
Doubly linklistDoubly linklist
Doubly linklist
 
Arrays
ArraysArrays
Arrays
 
Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.
 
C++ TUTORIAL 10
C++ TUTORIAL 10C++ TUTORIAL 10
C++ TUTORIAL 10
 
C++ TUTORIAL 7
C++ TUTORIAL 7C++ TUTORIAL 7
C++ TUTORIAL 7
 
Python Tidbits
Python TidbitsPython Tidbits
Python Tidbits
 
Basic Calculus in R.
Basic Calculus in R. Basic Calculus in R.
Basic Calculus in R.
 
python高级内存管理
python高级内存管理python高级内存管理
python高级内存管理
 
NumPy Refresher
NumPy RefresherNumPy Refresher
NumPy Refresher
 
Advanced Data Visualization in R- Somes Examples.
Advanced Data Visualization in R- Somes Examples.Advanced Data Visualization in R- Somes Examples.
Advanced Data Visualization in R- Somes Examples.
 
Frsa
FrsaFrsa
Frsa
 
OOP 2012 - Hint: Dynamic allocation in c++
OOP 2012 - Hint: Dynamic allocation in c++OOP 2012 - Hint: Dynamic allocation in c++
OOP 2012 - Hint: Dynamic allocation in c++
 
Pointer Events in Canvas
Pointer Events in CanvasPointer Events in Canvas
Pointer Events in Canvas
 
Advanced Data Visualization Examples with R-Part II
Advanced Data Visualization Examples with R-Part IIAdvanced Data Visualization Examples with R-Part II
Advanced Data Visualization Examples with R-Part II
 
Scientific Computing with Python Webinar March 19: 3D Visualization with Mayavi
Scientific Computing with Python Webinar March 19: 3D Visualization with MayaviScientific Computing with Python Webinar March 19: 3D Visualization with Mayavi
Scientific Computing with Python Webinar March 19: 3D Visualization with Mayavi
 
C++ TUTORIAL 1
C++ TUTORIAL 1C++ TUTORIAL 1
C++ TUTORIAL 1
 
C++ TUTORIAL 9
C++ TUTORIAL 9C++ TUTORIAL 9
C++ TUTORIAL 9
 
Class array
Class arrayClass array
Class array
 
Generative Adversarial Nets
Generative Adversarial NetsGenerative Adversarial Nets
Generative Adversarial Nets
 

Similar to CUDA First Programs: Computer Architecture CSE448 : UAA Alaska : Notes

CUDA by Example : Parallel Programming in CUDA C : Notes
CUDA by Example : Parallel Programming in CUDA C : NotesCUDA by Example : Parallel Programming in CUDA C : Notes
CUDA by Example : Parallel Programming in CUDA C : NotesSubhajit Sahu
 
Introduction to CUDA C: NVIDIA : Notes
Introduction to CUDA C: NVIDIA : NotesIntroduction to CUDA C: NVIDIA : Notes
Introduction to CUDA C: NVIDIA : NotesSubhajit Sahu
 
Mouse programming in c
Mouse programming in cMouse programming in c
Mouse programming in cgkgaur1987
 
"Optimization of a .NET application- is it simple ! / ?", Yevhen Tatarynov
"Optimization of a .NET application- is it simple ! / ?",  Yevhen Tatarynov"Optimization of a .NET application- is it simple ! / ?",  Yevhen Tatarynov
"Optimization of a .NET application- is it simple ! / ?", Yevhen TatarynovFwdays
 
FALLSEM2022-23_BCSE202L_TH_VL2022230103292_Reference_Material_I_08-08-2022_C_...
FALLSEM2022-23_BCSE202L_TH_VL2022230103292_Reference_Material_I_08-08-2022_C_...FALLSEM2022-23_BCSE202L_TH_VL2022230103292_Reference_Material_I_08-08-2022_C_...
FALLSEM2022-23_BCSE202L_TH_VL2022230103292_Reference_Material_I_08-08-2022_C_...AntareepMajumder
 
Question In C Programming In mathematics, a set is a colle...Sav.pdf
Question In C Programming In mathematics, a set is a colle...Sav.pdfQuestion In C Programming In mathematics, a set is a colle...Sav.pdf
Question In C Programming In mathematics, a set is a colle...Sav.pdfarihantcomp1008
 
DoublyList-cpp- #include -DoublyList-h- using namespace std- void Doub.pdf
DoublyList-cpp- #include -DoublyList-h- using namespace std- void Doub.pdfDoublyList-cpp- #include -DoublyList-h- using namespace std- void Doub.pdf
DoublyList-cpp- #include -DoublyList-h- using namespace std- void Doub.pdfaathiauto
 
1sequences and sampling. Suppose we went to sample the x-axis from X.pdf
1sequences and sampling. Suppose we went to sample the x-axis from X.pdf1sequences and sampling. Suppose we went to sample the x-axis from X.pdf
1sequences and sampling. Suppose we went to sample the x-axis from X.pdfrushabhshah600
 
Data structures KTU chapter2.PPT
Data structures KTU chapter2.PPTData structures KTU chapter2.PPT
Data structures KTU chapter2.PPTAlbin562191
 
Lecture 5Arrays on c++ for Beginner.pptx
Lecture 5Arrays on c++ for Beginner.pptxLecture 5Arrays on c++ for Beginner.pptx
Lecture 5Arrays on c++ for Beginner.pptxarjurakibulhasanrrr7
 
CUDA by Example : Thread Cooperation : Notes
CUDA by Example : Thread Cooperation : NotesCUDA by Example : Thread Cooperation : Notes
CUDA by Example : Thread Cooperation : NotesSubhajit Sahu
 

Similar to CUDA First Programs: Computer Architecture CSE448 : UAA Alaska : Notes (20)

CUDA by Example : Parallel Programming in CUDA C : Notes
CUDA by Example : Parallel Programming in CUDA C : NotesCUDA by Example : Parallel Programming in CUDA C : Notes
CUDA by Example : Parallel Programming in CUDA C : Notes
 
Introduction to CUDA C: NVIDIA : Notes
Introduction to CUDA C: NVIDIA : NotesIntroduction to CUDA C: NVIDIA : Notes
Introduction to CUDA C: NVIDIA : Notes
 
Lecture 1 mte 407
Lecture 1 mte 407Lecture 1 mte 407
Lecture 1 mte 407
 
Lecture 1 mte 407
Lecture 1 mte 407Lecture 1 mte 407
Lecture 1 mte 407
 
Array
ArrayArray
Array
 
CS.3.Arrays.pdf
CS.3.Arrays.pdfCS.3.Arrays.pdf
CS.3.Arrays.pdf
 
Mouse programming in c
Mouse programming in cMouse programming in c
Mouse programming in c
 
"Optimization of a .NET application- is it simple ! / ?", Yevhen Tatarynov
"Optimization of a .NET application- is it simple ! / ?",  Yevhen Tatarynov"Optimization of a .NET application- is it simple ! / ?",  Yevhen Tatarynov
"Optimization of a .NET application- is it simple ! / ?", Yevhen Tatarynov
 
array2d.ppt
array2d.pptarray2d.ppt
array2d.ppt
 
FALLSEM2022-23_BCSE202L_TH_VL2022230103292_Reference_Material_I_08-08-2022_C_...
FALLSEM2022-23_BCSE202L_TH_VL2022230103292_Reference_Material_I_08-08-2022_C_...FALLSEM2022-23_BCSE202L_TH_VL2022230103292_Reference_Material_I_08-08-2022_C_...
FALLSEM2022-23_BCSE202L_TH_VL2022230103292_Reference_Material_I_08-08-2022_C_...
 
Introduction to c part 2
Introduction to c   part  2Introduction to c   part  2
Introduction to c part 2
 
Question In C Programming In mathematics, a set is a colle...Sav.pdf
Question In C Programming In mathematics, a set is a colle...Sav.pdfQuestion In C Programming In mathematics, a set is a colle...Sav.pdf
Question In C Programming In mathematics, a set is a colle...Sav.pdf
 
DoublyList-cpp- #include -DoublyList-h- using namespace std- void Doub.pdf
DoublyList-cpp- #include -DoublyList-h- using namespace std- void Doub.pdfDoublyList-cpp- #include -DoublyList-h- using namespace std- void Doub.pdf
DoublyList-cpp- #include -DoublyList-h- using namespace std- void Doub.pdf
 
Unit 2
Unit 2Unit 2
Unit 2
 
1sequences and sampling. Suppose we went to sample the x-axis from X.pdf
1sequences and sampling. Suppose we went to sample the x-axis from X.pdf1sequences and sampling. Suppose we went to sample the x-axis from X.pdf
1sequences and sampling. Suppose we went to sample the x-axis from X.pdf
 
Chapter2
Chapter2Chapter2
Chapter2
 
Data structures KTU chapter2.PPT
Data structures KTU chapter2.PPTData structures KTU chapter2.PPT
Data structures KTU chapter2.PPT
 
Lecture 5Arrays on c++ for Beginner.pptx
Lecture 5Arrays on c++ for Beginner.pptxLecture 5Arrays on c++ for Beginner.pptx
Lecture 5Arrays on c++ for Beginner.pptx
 
Cpp tutorial
Cpp tutorialCpp tutorial
Cpp tutorial
 
CUDA by Example : Thread Cooperation : Notes
CUDA by Example : Thread Cooperation : NotesCUDA by Example : Thread Cooperation : Notes
CUDA by Example : Thread Cooperation : Notes
 

More from Subhajit Sahu

DyGraph: A Dynamic Graph Generator and Benchmark Suite : NOTES
DyGraph: A Dynamic Graph Generator and Benchmark Suite : NOTESDyGraph: A Dynamic Graph Generator and Benchmark Suite : NOTES
DyGraph: A Dynamic Graph Generator and Benchmark Suite : NOTESSubhajit Sahu
 
Shared memory Parallelism (NOTES)
Shared memory Parallelism (NOTES)Shared memory Parallelism (NOTES)
Shared memory Parallelism (NOTES)Subhajit Sahu
 
A Dynamic Algorithm for Local Community Detection in Graphs : NOTES
A Dynamic Algorithm for Local Community Detection in Graphs : NOTESA Dynamic Algorithm for Local Community Detection in Graphs : NOTES
A Dynamic Algorithm for Local Community Detection in Graphs : NOTESSubhajit Sahu
 
Scalable Static and Dynamic Community Detection Using Grappolo : NOTES
Scalable Static and Dynamic Community Detection Using Grappolo : NOTESScalable Static and Dynamic Community Detection Using Grappolo : NOTES
Scalable Static and Dynamic Community Detection Using Grappolo : NOTESSubhajit Sahu
 
Application Areas of Community Detection: A Review : NOTES
Application Areas of Community Detection: A Review : NOTESApplication Areas of Community Detection: A Review : NOTES
Application Areas of Community Detection: A Review : NOTESSubhajit Sahu
 
Community Detection on the GPU : NOTES
Community Detection on the GPU : NOTESCommunity Detection on the GPU : NOTES
Community Detection on the GPU : NOTESSubhajit Sahu
 
Survey for extra-child-process package : NOTES
Survey for extra-child-process package : NOTESSurvey for extra-child-process package : NOTES
Survey for extra-child-process package : NOTESSubhajit Sahu
 
Dynamic Batch Parallel Algorithms for Updating PageRank : POSTER
Dynamic Batch Parallel Algorithms for Updating PageRank : POSTERDynamic Batch Parallel Algorithms for Updating PageRank : POSTER
Dynamic Batch Parallel Algorithms for Updating PageRank : POSTERSubhajit Sahu
 
Abstract for IPDPS 2022 PhD Forum on Dynamic Batch Parallel Algorithms for Up...
Abstract for IPDPS 2022 PhD Forum on Dynamic Batch Parallel Algorithms for Up...Abstract for IPDPS 2022 PhD Forum on Dynamic Batch Parallel Algorithms for Up...
Abstract for IPDPS 2022 PhD Forum on Dynamic Batch Parallel Algorithms for Up...Subhajit Sahu
 
Fast Incremental Community Detection on Dynamic Graphs : NOTES
Fast Incremental Community Detection on Dynamic Graphs : NOTESFast Incremental Community Detection on Dynamic Graphs : NOTES
Fast Incremental Community Detection on Dynamic Graphs : NOTESSubhajit Sahu
 
Can you fix farming by going back 8000 years : NOTES
Can you fix farming by going back 8000 years : NOTESCan you fix farming by going back 8000 years : NOTES
Can you fix farming by going back 8000 years : NOTESSubhajit Sahu
 
HITS algorithm : NOTES
HITS algorithm : NOTESHITS algorithm : NOTES
HITS algorithm : NOTESSubhajit Sahu
 
Basic Computer Architecture and the Case for GPUs : NOTES
Basic Computer Architecture and the Case for GPUs : NOTESBasic Computer Architecture and the Case for GPUs : NOTES
Basic Computer Architecture and the Case for GPUs : NOTESSubhajit Sahu
 
Dynamic Batch Parallel Algorithms for Updating Pagerank : SLIDES
Dynamic Batch Parallel Algorithms for Updating Pagerank : SLIDESDynamic Batch Parallel Algorithms for Updating Pagerank : SLIDES
Dynamic Batch Parallel Algorithms for Updating Pagerank : SLIDESSubhajit Sahu
 
Are Satellites Covered in Gold Foil : NOTES
Are Satellites Covered in Gold Foil : NOTESAre Satellites Covered in Gold Foil : NOTES
Are Satellites Covered in Gold Foil : NOTESSubhajit Sahu
 
Taxation for Traders < Markets and Taxation : NOTES
Taxation for Traders < Markets and Taxation : NOTESTaxation for Traders < Markets and Taxation : NOTES
Taxation for Traders < Markets and Taxation : NOTESSubhajit Sahu
 
A Generalization of the PageRank Algorithm : NOTES
A Generalization of the PageRank Algorithm : NOTESA Generalization of the PageRank Algorithm : NOTES
A Generalization of the PageRank Algorithm : NOTESSubhajit Sahu
 
ApproxBioWear: Approximating Additions for Efficient Biomedical Wearable Comp...
ApproxBioWear: Approximating Additions for Efficient Biomedical Wearable Comp...ApproxBioWear: Approximating Additions for Efficient Biomedical Wearable Comp...
ApproxBioWear: Approximating Additions for Efficient Biomedical Wearable Comp...Subhajit Sahu
 
Income Tax Calender 2021 (ITD) : NOTES
Income Tax Calender 2021 (ITD) : NOTESIncome Tax Calender 2021 (ITD) : NOTES
Income Tax Calender 2021 (ITD) : NOTESSubhajit Sahu
 
Youngistaan Foundation: Annual Report 2020-21 : NOTES
Youngistaan Foundation: Annual Report 2020-21 : NOTESYoungistaan Foundation: Annual Report 2020-21 : NOTES
Youngistaan Foundation: Annual Report 2020-21 : NOTESSubhajit Sahu
 

More from Subhajit Sahu (20)

DyGraph: A Dynamic Graph Generator and Benchmark Suite : NOTES
DyGraph: A Dynamic Graph Generator and Benchmark Suite : NOTESDyGraph: A Dynamic Graph Generator and Benchmark Suite : NOTES
DyGraph: A Dynamic Graph Generator and Benchmark Suite : NOTES
 
Shared memory Parallelism (NOTES)
Shared memory Parallelism (NOTES)Shared memory Parallelism (NOTES)
Shared memory Parallelism (NOTES)
 
A Dynamic Algorithm for Local Community Detection in Graphs : NOTES
A Dynamic Algorithm for Local Community Detection in Graphs : NOTESA Dynamic Algorithm for Local Community Detection in Graphs : NOTES
A Dynamic Algorithm for Local Community Detection in Graphs : NOTES
 
Scalable Static and Dynamic Community Detection Using Grappolo : NOTES
Scalable Static and Dynamic Community Detection Using Grappolo : NOTESScalable Static and Dynamic Community Detection Using Grappolo : NOTES
Scalable Static and Dynamic Community Detection Using Grappolo : NOTES
 
Application Areas of Community Detection: A Review : NOTES
Application Areas of Community Detection: A Review : NOTESApplication Areas of Community Detection: A Review : NOTES
Application Areas of Community Detection: A Review : NOTES
 
Community Detection on the GPU : NOTES
Community Detection on the GPU : NOTESCommunity Detection on the GPU : NOTES
Community Detection on the GPU : NOTES
 
Survey for extra-child-process package : NOTES
Survey for extra-child-process package : NOTESSurvey for extra-child-process package : NOTES
Survey for extra-child-process package : NOTES
 
Dynamic Batch Parallel Algorithms for Updating PageRank : POSTER
Dynamic Batch Parallel Algorithms for Updating PageRank : POSTERDynamic Batch Parallel Algorithms for Updating PageRank : POSTER
Dynamic Batch Parallel Algorithms for Updating PageRank : POSTER
 
Abstract for IPDPS 2022 PhD Forum on Dynamic Batch Parallel Algorithms for Up...
Abstract for IPDPS 2022 PhD Forum on Dynamic Batch Parallel Algorithms for Up...Abstract for IPDPS 2022 PhD Forum on Dynamic Batch Parallel Algorithms for Up...
Abstract for IPDPS 2022 PhD Forum on Dynamic Batch Parallel Algorithms for Up...
 
Fast Incremental Community Detection on Dynamic Graphs : NOTES
Fast Incremental Community Detection on Dynamic Graphs : NOTESFast Incremental Community Detection on Dynamic Graphs : NOTES
Fast Incremental Community Detection on Dynamic Graphs : NOTES
 
Can you fix farming by going back 8000 years : NOTES
Can you fix farming by going back 8000 years : NOTESCan you fix farming by going back 8000 years : NOTES
Can you fix farming by going back 8000 years : NOTES
 
HITS algorithm : NOTES
HITS algorithm : NOTESHITS algorithm : NOTES
HITS algorithm : NOTES
 
Basic Computer Architecture and the Case for GPUs : NOTES
Basic Computer Architecture and the Case for GPUs : NOTESBasic Computer Architecture and the Case for GPUs : NOTES
Basic Computer Architecture and the Case for GPUs : NOTES
 
Dynamic Batch Parallel Algorithms for Updating Pagerank : SLIDES
Dynamic Batch Parallel Algorithms for Updating Pagerank : SLIDESDynamic Batch Parallel Algorithms for Updating Pagerank : SLIDES
Dynamic Batch Parallel Algorithms for Updating Pagerank : SLIDES
 
Are Satellites Covered in Gold Foil : NOTES
Are Satellites Covered in Gold Foil : NOTESAre Satellites Covered in Gold Foil : NOTES
Are Satellites Covered in Gold Foil : NOTES
 
Taxation for Traders < Markets and Taxation : NOTES
Taxation for Traders < Markets and Taxation : NOTESTaxation for Traders < Markets and Taxation : NOTES
Taxation for Traders < Markets and Taxation : NOTES
 
A Generalization of the PageRank Algorithm : NOTES
A Generalization of the PageRank Algorithm : NOTESA Generalization of the PageRank Algorithm : NOTES
A Generalization of the PageRank Algorithm : NOTES
 
ApproxBioWear: Approximating Additions for Efficient Biomedical Wearable Comp...
ApproxBioWear: Approximating Additions for Efficient Biomedical Wearable Comp...ApproxBioWear: Approximating Additions for Efficient Biomedical Wearable Comp...
ApproxBioWear: Approximating Additions for Efficient Biomedical Wearable Comp...
 
Income Tax Calender 2021 (ITD) : NOTES
Income Tax Calender 2021 (ITD) : NOTESIncome Tax Calender 2021 (ITD) : NOTES
Income Tax Calender 2021 (ITD) : NOTES
 
Youngistaan Foundation: Annual Report 2020-21 : NOTES
Youngistaan Foundation: Annual Report 2020-21 : NOTESYoungistaan Foundation: Annual Report 2020-21 : NOTES
Youngistaan Foundation: Annual Report 2020-21 : NOTES
 

Recently uploaded

一比一原版(Otago毕业证书)奥塔哥理工学院毕业证成绩单学位证靠谱定制
一比一原版(Otago毕业证书)奥塔哥理工学院毕业证成绩单学位证靠谱定制一比一原版(Otago毕业证书)奥塔哥理工学院毕业证成绩单学位证靠谱定制
一比一原版(Otago毕业证书)奥塔哥理工学院毕业证成绩单学位证靠谱定制uodye
 
Vip Mumbai Call Girls Kalyan Call On 9920725232 With Body to body massage wit...
Vip Mumbai Call Girls Kalyan Call On 9920725232 With Body to body massage wit...Vip Mumbai Call Girls Kalyan Call On 9920725232 With Body to body massage wit...
Vip Mumbai Call Girls Kalyan Call On 9920725232 With Body to body massage wit...amitlee9823
 
Call Now ≽ 9953056974 ≼🔝 Call Girls In Yusuf Sarai ≼🔝 Delhi door step delevry≼🔝
Call Now ≽ 9953056974 ≼🔝 Call Girls In Yusuf Sarai ≼🔝 Delhi door step delevry≼🔝Call Now ≽ 9953056974 ≼🔝 Call Girls In Yusuf Sarai ≼🔝 Delhi door step delevry≼🔝
Call Now ≽ 9953056974 ≼🔝 Call Girls In Yusuf Sarai ≼🔝 Delhi door step delevry≼🔝9953056974 Low Rate Call Girls In Saket, Delhi NCR
 
怎样办理斯威本科技大学毕业证(SUT毕业证书)成绩单留信认证
怎样办理斯威本科技大学毕业证(SUT毕业证书)成绩单留信认证怎样办理斯威本科技大学毕业证(SUT毕业证书)成绩单留信认证
怎样办理斯威本科技大学毕业证(SUT毕业证书)成绩单留信认证tufbav
 
Just Call Vip call girls Bhiwandi Escorts ☎️9352988975 Two shot with one girl...
Just Call Vip call girls Bhiwandi Escorts ☎️9352988975 Two shot with one girl...Just Call Vip call girls Bhiwandi Escorts ☎️9352988975 Two shot with one girl...
Just Call Vip call girls Bhiwandi Escorts ☎️9352988975 Two shot with one girl...gajnagarg
 
VIP Call Girls Dharwad 7001035870 Whatsapp Number, 24/07 Booking
VIP Call Girls Dharwad 7001035870 Whatsapp Number, 24/07 BookingVIP Call Girls Dharwad 7001035870 Whatsapp Number, 24/07 Booking
VIP Call Girls Dharwad 7001035870 Whatsapp Number, 24/07 Bookingdharasingh5698
 
Abort pregnancy in research centre+966_505195917 abortion pills in Kuwait cyt...
Abort pregnancy in research centre+966_505195917 abortion pills in Kuwait cyt...Abort pregnancy in research centre+966_505195917 abortion pills in Kuwait cyt...
Abort pregnancy in research centre+966_505195917 abortion pills in Kuwait cyt...drmarathore
 
Call Girls Chickpet ☎ 7737669865☎ Book Your One night Stand (Bangalore)
Call Girls Chickpet ☎ 7737669865☎ Book Your One night Stand (Bangalore)Call Girls Chickpet ☎ 7737669865☎ Book Your One night Stand (Bangalore)
Call Girls Chickpet ☎ 7737669865☎ Book Your One night Stand (Bangalore)amitlee9823
 
Sector 18, Noida Call girls :8448380779 Model Escorts | 100% verified
Sector 18, Noida Call girls :8448380779 Model Escorts | 100% verifiedSector 18, Noida Call girls :8448380779 Model Escorts | 100% verified
Sector 18, Noida Call girls :8448380779 Model Escorts | 100% verifiedDelhi Call girls
 
➥🔝 7737669865 🔝▻ Deoghar Call-girls in Women Seeking Men 🔝Deoghar🔝 Escorts...
➥🔝 7737669865 🔝▻ Deoghar Call-girls in Women Seeking Men  🔝Deoghar🔝   Escorts...➥🔝 7737669865 🔝▻ Deoghar Call-girls in Women Seeking Men  🔝Deoghar🔝   Escorts...
➥🔝 7737669865 🔝▻ Deoghar Call-girls in Women Seeking Men 🔝Deoghar🔝 Escorts...amitlee9823
 
Just Call Vip call girls Shillong Escorts ☎️9352988975 Two shot with one girl...
Just Call Vip call girls Shillong Escorts ☎️9352988975 Two shot with one girl...Just Call Vip call girls Shillong Escorts ☎️9352988975 Two shot with one girl...
Just Call Vip call girls Shillong Escorts ☎️9352988975 Two shot with one girl...gajnagarg
 
Just Call Vip call girls Begusarai Escorts ☎️9352988975 Two shot with one gir...
Just Call Vip call girls Begusarai Escorts ☎️9352988975 Two shot with one gir...Just Call Vip call girls Begusarai Escorts ☎️9352988975 Two shot with one gir...
Just Call Vip call girls Begusarai Escorts ☎️9352988975 Two shot with one gir...gajnagarg
 
Get Premium Pimple Saudagar Call Girls (8005736733) 24x7 Rate 15999 with A/c ...
Get Premium Pimple Saudagar Call Girls (8005736733) 24x7 Rate 15999 with A/c ...Get Premium Pimple Saudagar Call Girls (8005736733) 24x7 Rate 15999 with A/c ...
Get Premium Pimple Saudagar Call Girls (8005736733) 24x7 Rate 15999 with A/c ...MOHANI PANDEY
 
Call Girls Chikhali Call Me 7737669865 Budget Friendly No Advance Booking
Call Girls Chikhali Call Me 7737669865 Budget Friendly No Advance BookingCall Girls Chikhali Call Me 7737669865 Budget Friendly No Advance Booking
Call Girls Chikhali Call Me 7737669865 Budget Friendly No Advance Bookingroncy bisnoi
 
SM-N975F esquematico completo - reparación.pdf
SM-N975F esquematico completo - reparación.pdfSM-N975F esquematico completo - reparación.pdf
SM-N975F esquematico completo - reparación.pdfStefanoBiamonte1
 
Hosa Road Call Girls Service: ☎ 7737669865 ☎ High Profile Model Escorts | Ban...
Hosa Road Call Girls Service: ☎ 7737669865 ☎ High Profile Model Escorts | Ban...Hosa Road Call Girls Service: ☎ 7737669865 ☎ High Profile Model Escorts | Ban...
Hosa Road Call Girls Service: ☎ 7737669865 ☎ High Profile Model Escorts | Ban...amitlee9823
 
Call Girls Banashankari Just Call 👗 7737669865 👗 Top Class Call Girl Service ...
Call Girls Banashankari Just Call 👗 7737669865 👗 Top Class Call Girl Service ...Call Girls Banashankari Just Call 👗 7737669865 👗 Top Class Call Girl Service ...
Call Girls Banashankari Just Call 👗 7737669865 👗 Top Class Call Girl Service ...amitlee9823
 
➥🔝 7737669865 🔝▻ Muzaffarpur Call-girls in Women Seeking Men 🔝Muzaffarpur🔝 ...
➥🔝 7737669865 🔝▻ Muzaffarpur Call-girls in Women Seeking Men  🔝Muzaffarpur🔝  ...➥🔝 7737669865 🔝▻ Muzaffarpur Call-girls in Women Seeking Men  🔝Muzaffarpur🔝  ...
➥🔝 7737669865 🔝▻ Muzaffarpur Call-girls in Women Seeking Men 🔝Muzaffarpur🔝 ...amitlee9823
 

Recently uploaded (20)

一比一原版(Otago毕业证书)奥塔哥理工学院毕业证成绩单学位证靠谱定制
一比一原版(Otago毕业证书)奥塔哥理工学院毕业证成绩单学位证靠谱定制一比一原版(Otago毕业证书)奥塔哥理工学院毕业证成绩单学位证靠谱定制
一比一原版(Otago毕业证书)奥塔哥理工学院毕业证成绩单学位证靠谱定制
 
Vip Mumbai Call Girls Kalyan Call On 9920725232 With Body to body massage wit...
Vip Mumbai Call Girls Kalyan Call On 9920725232 With Body to body massage wit...Vip Mumbai Call Girls Kalyan Call On 9920725232 With Body to body massage wit...
Vip Mumbai Call Girls Kalyan Call On 9920725232 With Body to body massage wit...
 
Call Now ≽ 9953056974 ≼🔝 Call Girls In Yusuf Sarai ≼🔝 Delhi door step delevry≼🔝
Call Now ≽ 9953056974 ≼🔝 Call Girls In Yusuf Sarai ≼🔝 Delhi door step delevry≼🔝Call Now ≽ 9953056974 ≼🔝 Call Girls In Yusuf Sarai ≼🔝 Delhi door step delevry≼🔝
Call Now ≽ 9953056974 ≼🔝 Call Girls In Yusuf Sarai ≼🔝 Delhi door step delevry≼🔝
 
怎样办理斯威本科技大学毕业证(SUT毕业证书)成绩单留信认证
怎样办理斯威本科技大学毕业证(SUT毕业证书)成绩单留信认证怎样办理斯威本科技大学毕业证(SUT毕业证书)成绩单留信认证
怎样办理斯威本科技大学毕业证(SUT毕业证书)成绩单留信认证
 
Abortion pills in Jeddah |+966572737505 | Get Cytotec
Abortion pills in Jeddah |+966572737505 | Get CytotecAbortion pills in Jeddah |+966572737505 | Get Cytotec
Abortion pills in Jeddah |+966572737505 | Get Cytotec
 
Just Call Vip call girls Bhiwandi Escorts ☎️9352988975 Two shot with one girl...
Just Call Vip call girls Bhiwandi Escorts ☎️9352988975 Two shot with one girl...Just Call Vip call girls Bhiwandi Escorts ☎️9352988975 Two shot with one girl...
Just Call Vip call girls Bhiwandi Escorts ☎️9352988975 Two shot with one girl...
 
VIP Call Girls Dharwad 7001035870 Whatsapp Number, 24/07 Booking
VIP Call Girls Dharwad 7001035870 Whatsapp Number, 24/07 BookingVIP Call Girls Dharwad 7001035870 Whatsapp Number, 24/07 Booking
VIP Call Girls Dharwad 7001035870 Whatsapp Number, 24/07 Booking
 
Abort pregnancy in research centre+966_505195917 abortion pills in Kuwait cyt...
Abort pregnancy in research centre+966_505195917 abortion pills in Kuwait cyt...Abort pregnancy in research centre+966_505195917 abortion pills in Kuwait cyt...
Abort pregnancy in research centre+966_505195917 abortion pills in Kuwait cyt...
 
Call Girls Chickpet ☎ 7737669865☎ Book Your One night Stand (Bangalore)
Call Girls Chickpet ☎ 7737669865☎ Book Your One night Stand (Bangalore)Call Girls Chickpet ☎ 7737669865☎ Book Your One night Stand (Bangalore)
Call Girls Chickpet ☎ 7737669865☎ Book Your One night Stand (Bangalore)
 
CHEAP Call Girls in Ashok Nagar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
CHEAP Call Girls in Ashok Nagar  (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICECHEAP Call Girls in Ashok Nagar  (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
CHEAP Call Girls in Ashok Nagar (-DELHI )🔝 9953056974🔝(=)/CALL GIRLS SERVICE
 
Sector 18, Noida Call girls :8448380779 Model Escorts | 100% verified
Sector 18, Noida Call girls :8448380779 Model Escorts | 100% verifiedSector 18, Noida Call girls :8448380779 Model Escorts | 100% verified
Sector 18, Noida Call girls :8448380779 Model Escorts | 100% verified
 
➥🔝 7737669865 🔝▻ Deoghar Call-girls in Women Seeking Men 🔝Deoghar🔝 Escorts...
➥🔝 7737669865 🔝▻ Deoghar Call-girls in Women Seeking Men  🔝Deoghar🔝   Escorts...➥🔝 7737669865 🔝▻ Deoghar Call-girls in Women Seeking Men  🔝Deoghar🔝   Escorts...
➥🔝 7737669865 🔝▻ Deoghar Call-girls in Women Seeking Men 🔝Deoghar🔝 Escorts...
 
Just Call Vip call girls Shillong Escorts ☎️9352988975 Two shot with one girl...
Just Call Vip call girls Shillong Escorts ☎️9352988975 Two shot with one girl...Just Call Vip call girls Shillong Escorts ☎️9352988975 Two shot with one girl...
Just Call Vip call girls Shillong Escorts ☎️9352988975 Two shot with one girl...
 
Just Call Vip call girls Begusarai Escorts ☎️9352988975 Two shot with one gir...
Just Call Vip call girls Begusarai Escorts ☎️9352988975 Two shot with one gir...Just Call Vip call girls Begusarai Escorts ☎️9352988975 Two shot with one gir...
Just Call Vip call girls Begusarai Escorts ☎️9352988975 Two shot with one gir...
 
Get Premium Pimple Saudagar Call Girls (8005736733) 24x7 Rate 15999 with A/c ...
Get Premium Pimple Saudagar Call Girls (8005736733) 24x7 Rate 15999 with A/c ...Get Premium Pimple Saudagar Call Girls (8005736733) 24x7 Rate 15999 with A/c ...
Get Premium Pimple Saudagar Call Girls (8005736733) 24x7 Rate 15999 with A/c ...
 
Call Girls Chikhali Call Me 7737669865 Budget Friendly No Advance Booking
Call Girls Chikhali Call Me 7737669865 Budget Friendly No Advance BookingCall Girls Chikhali Call Me 7737669865 Budget Friendly No Advance Booking
Call Girls Chikhali Call Me 7737669865 Budget Friendly No Advance Booking
 
SM-N975F esquematico completo - reparación.pdf
SM-N975F esquematico completo - reparación.pdfSM-N975F esquematico completo - reparación.pdf
SM-N975F esquematico completo - reparación.pdf
 
Hosa Road Call Girls Service: ☎ 7737669865 ☎ High Profile Model Escorts | Ban...
Hosa Road Call Girls Service: ☎ 7737669865 ☎ High Profile Model Escorts | Ban...Hosa Road Call Girls Service: ☎ 7737669865 ☎ High Profile Model Escorts | Ban...
Hosa Road Call Girls Service: ☎ 7737669865 ☎ High Profile Model Escorts | Ban...
 
Call Girls Banashankari Just Call 👗 7737669865 👗 Top Class Call Girl Service ...
Call Girls Banashankari Just Call 👗 7737669865 👗 Top Class Call Girl Service ...Call Girls Banashankari Just Call 👗 7737669865 👗 Top Class Call Girl Service ...
Call Girls Banashankari Just Call 👗 7737669865 👗 Top Class Call Girl Service ...
 
➥🔝 7737669865 🔝▻ Muzaffarpur Call-girls in Women Seeking Men 🔝Muzaffarpur🔝 ...
➥🔝 7737669865 🔝▻ Muzaffarpur Call-girls in Women Seeking Men  🔝Muzaffarpur🔝  ...➥🔝 7737669865 🔝▻ Muzaffarpur Call-girls in Women Seeking Men  🔝Muzaffarpur🔝  ...
➥🔝 7737669865 🔝▻ Muzaffarpur Call-girls in Women Seeking Men 🔝Muzaffarpur🔝 ...
 

CUDA First Programs: Computer Architecture CSE448 : UAA Alaska : Notes

  • 1. CUDA – First Programs “Hello, world” is traditionally the first program we write. We can do the same for CUDA. Here it is: In file hello.cu: #include "stdio.h" int main() { printf("Hello, worldn"); return 0; } On our Tesla machine, you can compile and this with: $ nvcc hello.cu $ ./a.out You can change the output file name with the –o flag: nvcc –o hello hello.cu If you edit your .bashrc file you can also add your current directory to your path if you don’t want to have to type the preceding . all of the time, which refers to the current working directory. Add export PATH=$PATH:. To the .bashrc file. Some would recommend not doing this for security purposes. You might consider this program to be cheating, since it doesn’t really use any CUDA functionality. Everything runs on the host. However, the point is that CUDA C programs can do everything a regular C program can do.
  • 2. Here is a slightly more interesting (but inefficient and only useful as an example) program that adds two numbers together using a kernel function: #include "stdio.h" __global__ void add(int a, int b, int *c) { *c = a + b; } int main() { int a,b,c; int *dev_c; a=3; b=4; cudaMalloc((void**)&dev_c, sizeof(int)); add<<<1,1>>>(a,b,dev_c); cudaMemcpy(&c, dev_c, sizeof(int), cudaMemcpyDeviceToHost); printf("%d + %d is %dn", a, b, c); cudaFree(dev_c); return 0; } To do in class: walk through program; show similar program in straight C and it runs much faster! Why? cudaMalloc returns cudaSuccess if it was successful; could check to ensure that the program will run correctly.
  • 3. Example: Summing Vectors This is a simple problem. Given two vectors (i.e. arrays), we would like to add them together in a third array. For example: A = {0, 2, 4, 6, 8} B = {1, 1, 2, 2, 1} Then A + B = C = {1, 3, 6, 8, 9} In this example the array is 5 elements long, so our approach will be to create 5 different threads. The first thread is responsible for computing C[0] = A[0] + B[0]. The second thread is responsible for computing C[1] = A[1] + B[1], and so forth. Here is how we can do this with traditional C code: #include "stdio.h" #define N 10 void add(int *a, int *b, int *c) { int tID = 0; while (tID < N) { c[tID] = a[tID] + b[tID]; tID += 1; } } int main() { int a[N], b[N], c[N]; // Fill Arrays for (int i = 0; i < N; i++) { a[i] = i, b[i] = 1; } add (a, b, c); for (int i = 0; i < N; i++) { printf("%d + %d = %dn", a[i], b[i], c[i]); } return 0; }
  • 4. This is a rather roundabout way to add two arrays – our reason is because this will translate a little nicer to the CUDA version. To compile and run it, we have to use g++ (since it uses some C++ style notations that don’t work in C). Here is the CUDA version: #include "stdio.h" #define N 10 __global__ void add(int *a, int *b, int *c) { int tID = blockIdx.x; if (tID < N) { c[tID] = a[tID] + b[tID]; } } int main() { int a[N], b[N], c[N]; int *dev_a, *dev_b, *dev_c; cudaMalloc((void **) &dev_a, N*sizeof(int)); cudaMalloc((void **) &dev_b, N*sizeof(int)); cudaMalloc((void **) &dev_c, N*sizeof(int)); // Fill Arrays for (int i = 0; i < N; i++) { a[i] = i, b[i] = 1; } cudaMemcpy(dev_a, a, N*sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(dev_b, b, N*sizeof(int), cudaMemcpyHostToDevice); add<<<N,1>>>(dev_a, dev_b, dev_c); cudaMemcpy(c, dev_c, N*sizeof(int), cudaMemcpyDeviceToHost); for (int i = 0; i < N; i++) { printf("%d + %d = %dn", a[i], b[i], c[i]); } return 0; } blockIDx.x gives us the Block ID, which ranges from 0 to N-1. What if we used add<<<1,N>>> instead? Then we can access by the ThreadID which is the variable threadIDx.x.
  • 5. As another example, let’s add two 2D arrays. We can define a 2D array of ints as follows: int c[2][3]; The following code illustrates how the 2D array is laid out in memory: for (int i=0; i < 2; i++) for (int j=0; j< 3; j++) printf("[%d][%d] at %ldn",i,j,&c[i][j]); Output: [0][0] at 140733933298160 [0][1] at 140733933298164 [0][2] at 140733933298168 [1][0] at 140733933298172 [1][1] at 140733933298176 [1][2] at 140733933298180 We can see that we have a layout where the next cell in the j dimension occupies the next sequential integer in memory, where an int is 4 bytes: c[0][0] at &c c[0][1] at &c + 4 c[0][2] at &c + 8 c[1][0] at &c + 12 c[1][1] at &c + 16 c[1][2] at &c + 20 In general, the address of a cell can be computed by: &c + [(sizeof(int) * sizeof-j-dimension * i] + (sizeof(int)) * j In our example the size of the j dimension is 3. For example, the cell at c[1][1] would be combined as the base address + (4*3*1) + (4*1) = &c+16. C will do the addressing for us if we use the array notation, so if INDEX=i*WIDTH + J then we can access the element via: c[INDEX] CUDA requires we allocate memory as a one-dimensional array, so we can use the mapping above to a 2D array. To make the mapping a little easier in the kernel function we can declare the blocks to be in a grid that is the same dimensions as the 2D array. This will create variables blockIdx.x and blockIdx.y that correspond to the width and height of the array.
  • 6. #include "stdio.h" #define COLUMNS 3 #define ROWS 2 __global__ void add(int *a, int *b, int *c) { int x = blockIdx.x; int y = blockIdx.y; int i = (COLUMNS*y) + x; c[i] = a[i] + b[i]; } int main() { int a[ROWS][COLUMNS], b[ROWS][COLUMNS], c[ROWS][COLUMNS]; int *dev_a, *dev_b, *dev_c; cudaMalloc((void **) &dev_a, ROWS*COLUMNS*sizeof(int)); cudaMalloc((void **) &dev_b, ROWS*COLUMNS*sizeof(int)); cudaMalloc((void **) &dev_c, ROWS*COLUMNS*sizeof(int)); for (int y = 0; y < ROWS; y++) // Fill Arrays for (int x = 0; x < COLUMNS; x++) { a[y][x] = x; b[y][x] = y; } cudaMemcpy(dev_a, a, ROWS*COLUMNS*sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(dev_b, b, ROWS*COLUMNS*sizeof(int), cudaMemcpyHostToDevice); dim3 grid(COLUMNS,ROWS); add<<<grid,1>>>(dev_a, dev_b, dev_c); cudaMemcpy(c, dev_c, ROWS*COLUMNS*sizeof(int), cudaMemcpyDeviceToHost); for (int y = 0; y < ROWS; y++) // Output Arrays { for (int x = 0; x < COLUMNS; x++) { printf("[%d][%d]=%d ",y,x,c[y][x]); } printf("n"); } return 0; }