Your SlideShare is downloading. ×
0
USING	
  OPENGL	
  AND	
  DIRECTX	
  FOR	
  
HETEROGENEOUS	
  COMPUTE	
  
KARL	
  HILLESLAND	
  
AGENDA	
  

THE	
  GRAPHICS	
  PIPELINE	
  

PROGRAMMING	
  THE	
  GPU	
  

FEEDING	
  THE	
  GPU	
  

2	
   |	
  	
  	
  ...
The	
  Graphics	
  
Pipeline	
  
GRAPHICS	
  PIPELINE	
  
SHADER	
  CENTRIC	
  

OpenGL	
  

DirectX	
  

	
  
Vertex	
  Shader	
  

	
  

Vertex	
  Shader...
GRAPHICS	
  PIPELINE	
  
MORE	
  DETAILS	
  

indices,	
  
verQces	
  

	
  

Input	
  Assembler

Thread	
  per	
  DS	
  v...
prim	
  

Hi-­‐Z/Stencil	
  info	
  

	
  

	
  

Rasterizer	
  2

	
  

Unroller

Rasterizer	
  1

Hi-­‐Z/Stencil

Unroll...
WHAT’S	
  THE	
  POINT?	
  
!  The	
  Graphics	
  pipeline	
  has	
  a	
  lot	
  more	
  parts	
  
‒  Reorganizes	
  threa...
GRAPHICS	
  IN	
  THE	
  NINETIES	
  	
  

	
  

Input	
  Assembler

	
  

Transform	
  and	
  LighQng

	
  

Rasterizer

...
VORONOI	
  DIAGRAMS	
  
GPGPU	
  WITHOUT	
  SHADERS	
  

!  Color	
  according	
  to	
  closest	
  
‒  Point	
  
‒  Line	
...
VORONOI	
  DIAGRAMS	
  IN	
  THE	
  NINETIES	
  

Simply	
  rasterize	
  the	
  
cones	
  using	
  graphics	
  
hardware	
...
OPENGL	
  1	
  SIMD	
  MACHINE	
  
PEERCY,	
  ET.	
  AL.	
  SIGGRAPH	
  2000	
  

SIMD	
  Concept	
  

OpenGL	
  1	
  SIMD...
USING	
  EARLY-­‐Z	
  OR	
  STENCIL	
  

Texture-­‐space	
  blur	
  

With	
  back-­‐face	
  culling	
  

ApplicaQons	
  o...
What’s	
  the	
  Point?	
  

The	
  graphics	
  pipeline	
  	
  
gives	
  you	
  access	
  to	
  more	
  

13	
   |	
  	
 ...
Programming	
  
the	
  GPU	
  
SHADER	
  TYPES	
  
!  Compute	
  (4.3)	
  
!  Vertex	
  (2,	
  ES	
  2)	
  
!  TessellaQon	
  Control	
  (4)	
  
!  Tesse...
BASIC	
  GLSL	
  VERTEX	
  SHADER	
  
#version 430
in vec3 Position;
in vec2 UV;
out PosUV //Not available in GLES
{
vec3 ...
BASIC	
  GLSL	
  PIXEL	
  SHADER	
  
in fsInput //Not available in GLES
{
vec3 vPositionWS;
vec2 vUV;
} fs_input;
uniform ...
BASIC	
  HLSL	
  VERTEX	
  SHADER	
  
struct PosUV //Not available in GLES
{
float4 vPositionSS : SV_POSITION;
float3 vPos...
BASIC	
  HLSL	
  PIXEL	
  SHADER	
  
struct fsInput
{
float3 vPositionWS : POSITION;
float2 vUV : TEXCOORD0;
};
sampler sW...
BASIC	
  GEOMETRY	
  SHADER	
  
layout (triangles) in;
layout (triangle_strip, max_vertices = 3) out;
void main(void)
{
fo...
TESSELLATION	
  
TessellaQon	
  Control	
  

Hull	
  Shader	
  

Patch	
  Constant	
  Func	
  

Tess	
  factors	
  
Tess	
...
TESSELLATION	
  
// Tessellation Control
layout (vertices = 4) out;
void TCS(void)
{
if (gl_InvocationID == 0)
{
gl_TessLe...
TESSELLATION	
  CONTROL	
  
out patch float tessFactor;
void main(void)
{
if (gl_InvocationID == 0)

TessellaQon	
  rate	
...
COMPUTE	
  SHADERS	
  
Thread Group

Thread

Thread

group size y

Thread

global size y

global size x

Thread

group siz...
OPENGL	
  COMPUTE	
  
buffer BlockName { int linearOutput[] };
shared int var;
layout(local_size_x = 64, local_size_y = 1,...
DIRECT	
  COMPUTE	
  
RWStructuredBuffer<int> linearOutput;
groupshared int var;
[numthreads(64, 1, 1)]
void ContrivedSamp...
PROGRAMMING	
  THE	
  GPU	
  
SYNCHRONIZATION	
  
MEMORY	
  COHERENCE-­‐	
  GL	
  /	
  DX	
  
Dispatch	
  

CS	
  

28	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
...
MEMORY	
  COHERENCE-­‐	
  GL/DX	
  11.1	
  
Draw	
  

VS	
  

Mem	
  

GS	
  

VS	
  
GS	
  

FS	
  
FS	
  

RT	
  

29	
 ...
MEMORY	
  COHERENCE-­‐	
  GL	
  /	
  DX	
  11.1	
  

Draw	
  
VS	
  

Mem	
  

GS	
  

FS	
  
RT	
  

30	
   |	
  	
  	
  ...
Feeding	
  the	
  
GPU	
  
DRIVER	
  STACKS	
  (WINDOWS)	
  

	
  OpenGL	
  App	
  

DirectX	
  App	
  

OpenGL32.dll	
  

D3D11.dll	
  

D3D	
  UMD	...
DRIVER	
  STACKS	
  (LINUX)	
  

App	
  
libGL	
  

Gallium3D	
  
State	
  tracker	
  
DRI	
  

Or	
  

Hardware	
  layer	...
FEEDING	
  THE	
  GPU	
  
GPU-­‐CPU	
  SYNCHRONIZATION	
  
DRIVER	
  COMMAND	
  QUEUE	
  
ApplicaQon	
  
Dr	
  
5	
  

Da	
  
5	
  

Da	
  1	
  

Dr	
  1	
  

Da	
  	
  
6	
  

Da	
...
CPU/GPU	
  MEMORY	
  SYNCHRONIZATION	
  
BY	
  DRIVER	
  

App	
  
Memory	
  

Driver	
  
Copy	
  

App	
  
Memory	
  

Dr...
CPU/GPU	
  MEMORY	
  SYNCHRONIZATION	
  
MANUAL	
  

App	
  
Memory	
  

Da	
  1	
  

Dr	
  1	
  

Driver	
  
Copy	
  

Ap...
FEEDING	
  THE	
  GPU	
  
DATA	
  
LEGACY	
  OPENGL	
  OBJECT	
  MODEL	
  
!  glGenBuffers,	
  glGenTextures,	
  glGenSamplers,	
  …	
  
‒  Creates	
  name	
 ...
BUFFER	
  BINDING	
  AND	
  CREATION	
  

glBindBuffer(target,name)	
  

Target	
  

binding	
  

BufferObject	
  

State,	
...
SETTING	
  DATA	
  (SIMPLEST	
  OPTION)	
  

glBufferData	
  
(target,	
  size,	
  pData,	
  usage)	
  

data	
  

Target	
...
BUFFER	
  TARGETS	
  
GL	
  Name	
  

Typical	
  Purpose	
  	
  

DX	
  Equivalent	
  

ARRAY	
  

VerQces	
  

VERTEX	
  ...
DIRECTX	
  OBJECTS	
  AND	
  VIEWS	
  
!  Resource	
  (base	
  class)	
  
‒  Usage:	
  default,	
  immutable,	
  dynamic,	...
OBJECT	
  AND	
  VIEW	
  EXAMPLE	
  
D3D11_BUFFER_DESC desc;
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
…
pDevice->Creat...
DATA	
  TYPES	
  
Image	
  

45	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	...
IMMUTABLE	
  TEXTURES	
  (4.2,	
  GLES	
  3)	
  
glGenTextures(1, &texObjName);
glBindTexture(GL_TEXTURE_2D_ARRAY,
texObjN...
FEEDING	
  THE	
  GPU	
  
PROGRAMS	
  
SHADER	
  MANAGEMENT	
  -­‐	
  OPENGL	
  
Program	
  Object	
  
GLuint shader = glCreateShader(GL_VERTEX_SHADER);

Vertex	...
BASIC	
  GLSL	
  PIXEL	
  SHADER	
  
in fsInput //Not available in GLES
{
vec3 vPositionWS;
vec2 vUV;
} fs_input;
uniform ...
BASIC	
  GLSL	
  VERTEX	
  SHADER	
  
#version 430
in vec3 Position;
in vec2 UV;
out PosUV //Not available in GLES
{
vec3 ...
SHADER	
  MANAGEMENT	
  -­‐	
  DX	
  
D3DCompile(source,..,vs_5_0,..,&pByteCode)
pShader = CreateVertexShader(pByteCode);
...
PROGRAM	
  BINARIES	
  
OpenGL	
  
glGetProgramBinary(program,…,format,pBinaryOut);

DirectX	
  
D3DCompile(source,..,vs_5...
DRAW	
  CALLS	
  
OpenGL	
  

D3D	
  

glDrawArrays	
  

Draw	
  

glDrawArraysInstanced	
  

DrawInstanced(…,0)	
  

glDr...
COMPUTE	
  SHADERS	
  
glDispatchCompute(nGroupsX,nGroupsY,nGroupsZ)
	
  

Dispatch(nGroupsX,nGroupsY,nGroupsZ
)

	
  

gl...
Wrap	
  up	
  
IMAGE-­‐BASED	
  MODELING	
  

56	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
 ...
GENERATING	
  THE	
  MODEL	
  
Render:	
  projecQon,	
  
rasterizaQon,	
  
texturing,	
  depth	
  
buffering,	
  …	
  

57	...
TressFX	
  
HAIR	
  

!  AMD	
  technology	
  for	
  high-­‐quality	
  hair	
  rendering	
  
!  Thousands	
  of	
  hair	
 ...
NOT	
  EXPOSED	
  IN	
  GRAPHICS	
  APIS	
  (YET)	
  
!  Local	
  shared	
  memory	
  restricted	
  to	
  
‒  Compute	
  	...
SUMMARY	
  

The	
  graphics	
  pipeline	
  	
  
gives	
  you	
  access	
  to	
  different	
  hardware	
  

There	
  are	
 ...
DISCLAIMER	
  &	
  ATTRIBUTION	
  
The	
  informaQon	
  presented	
  in	
  this	
  document	
  is	
  for	
  informaQonal	
...
Upcoming SlideShare
Loading in...5
×

PG-4034, Using OpenGL and DirectX for Heterogeneous Compute, by Karl Hillesland

5,259

Published on

Presentation PG-4034, Using OpenGL and DirectX for Heterogeneous Compute, by Karl Hillesland at the AMD Developer Summit (APU13) November 11-13, 2013

Published in: Technology, Art & Photos
0 Comments
1 Like
Statistics
Notes
  • Be the first to comment

No Downloads
Views
Total Views
5,259
On Slideshare
0
From Embeds
0
Number of Embeds
0
Actions
Shares
0
Downloads
35
Comments
0
Likes
1
Embeds 0
No embeds

No notes for slide

Transcript of "PG-4034, Using OpenGL and DirectX for Heterogeneous Compute, by Karl Hillesland"

  1. 1. USING  OPENGL  AND  DIRECTX  FOR   HETEROGENEOUS  COMPUTE   KARL  HILLESLAND  
  2. 2. AGENDA   THE  GRAPHICS  PIPELINE   PROGRAMMING  THE  GPU   FEEDING  THE  GPU   2   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  3. 3. The  Graphics   Pipeline  
  4. 4. GRAPHICS  PIPELINE   SHADER  CENTRIC   OpenGL   DirectX     Vertex  Shader     Vertex  Shader   TessellaQon  Control  Shader   TessellaQon  EvaluaQon  Shader   Geometry  Shader   Rasterizer   Fragment  Shader   Per-­‐Fragment  OperaQons   TessellaQon  PrimiQve  Generator 4   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL     Input  Assembler Vertex  Puller   Tessellator   Hull  Shader   Domain  Shader   Geometry  Shader   Rasterizer   Pixel  Shader   Output  Merger
  5. 5. GRAPHICS  PIPELINE   MORE  DETAILS   indices,   verQces     Input  Assembler Thread  per  DS  vertex  (n3)   Barycentric   Domain  Shader   DS  vertex   Collects  prims   vertex       Tessellator Patch  verts  n2       Prim  verts   Geometry  Shader 5   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Prims   Collects  Patches Patch  verts  n1   Tess   factors   Collects  patches   vertex   Vertex  Shader Thread   per  vertex     Patch  Constant   Hull  Shader Control  point     PrimiQve  Assembler Thread  per  output   control  point  n2     Next  Slide
  6. 6. prim   Hi-­‐Z/Stencil  info       Rasterizer  2   Unroller Rasterizer  1 Hi-­‐Z/Stencil Unrolling,   Masking   Pixel  Shader     Reordering Depth/Stencil         Blending Not  shown:  Any  shader  stage  can  read/write  to  memory,   including  atomics,  filtering*,  decompression,  and  sRGB   conversion     Collects  Quads Conversion 6   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Early-­‐Z/Stencil  
  7. 7. WHAT’S  THE  POINT?   !  The  Graphics  pipeline  has  a  lot  more  parts   ‒  Reorganizes  threads   ‒  Tracks  dependencies   ‒  Reorders   ‒  Extra  fixed-­‐funcQon  units   !  Are  they  usable?   7   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  8. 8. GRAPHICS  IN  THE  NINETIES       Input  Assembler   Transform  and  LighQng   Rasterizer   Texturing  and  Fog   Output  Merger 8   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  9. 9. VORONOI  DIAGRAMS   GPGPU  WITHOUT  SHADERS   !  Color  according  to  closest   ‒  Point   ‒  Line   !  Could  be  weighted   !  Useful  for     ‒  Collision  DetecQon   ‒  Surface  ReconstrucQon   ‒  Robot  MoQon  Planning   ‒  Non-­‐PhotorealisQc  Rendering   ‒  Surface  SimplificaQon   ‒  Mesh  GeneraQon   9   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  10. 10. VORONOI  DIAGRAMS  IN  THE  NINETIES   Simply  rasterize  the   cones  using  graphics   hardware   Haeberli90,  Woo97   10   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   2-­‐part  discrete  Voronoi   diagram  representaQon   Color  Buffer   Site  IDs   Depth  Buffer   Distance  
  11. 11. OPENGL  1  SIMD  MACHINE   PEERCY,  ET.  AL.  SIGGRAPH  2000   SIMD  Concept   OpenGL  1  SIMD   InstrucQon   OpenGL  call  (CPU)   SIMD  Lane   Pixel   SIMD  Lane  Input  Data   Texel   SIMD  Lane  Output  Data   Fragment   ALU   Blend  OperaQon   CondiQonals   Alpha  and  Stencil  Tests   11   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   float y; float4 contrived_example() { float x = f(u,v) if( x*y > 0) { x = x + g(u,v) } return x*h(u,v); }
  12. 12. USING  EARLY-­‐Z  OR  STENCIL   Texture-­‐space  blur   With  back-­‐face  culling   ApplicaQons  of  Explicit  Early-­‐Z  Culling,  Real-­‐Time  Shading  Course,  Siggraph  2004.   12   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Pressure  buffer   used  for  sim  culling  
  13. 13. What’s  the  Point?   The  graphics  pipeline     gives  you  access  to  more   13   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  14. 14. Programming   the  GPU  
  15. 15. SHADER  TYPES   !  Compute  (4.3)   !  Vertex  (2,  ES  2)   !  TessellaQon  Control  (4)   !  TessellaQon  EvaluaQon  (4)     !  Geometry  (3)   !  Fragment  (2,  ES  2)   OpenGL   15   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   !  !  !  !  !  !  Compute  (11)   Vertex  (8)   Hull  (11)   Domain  (11)   Geometry  (10)   Pixel  (9)   D3D   15  
  16. 16. BASIC  GLSL  VERTEX  SHADER   #version 430 in vec3 Position; in vec2 UV; out PosUV //Not available in GLES { vec3 vPositionWS; vec2 vUV; } vs_output; uniform mat4x4 mMVP; uniform mat4x4 mM; void main(void) { gl_Position = mMVP * vec4(Position, 1.0); vs_output.vPositionWS = mM * vec4(Position, 1.0); vs_output.vUV = UV; } 16   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   16  
  17. 17. BASIC  GLSL  PIXEL  SHADER   in fsInput //Not available in GLES { vec3 vPositionWS; vec2 vUV; } fs_input; uniform sampler2D sDiffuse; out vec4 color_out; void main(void) { color_out = texture( sDiffuse, fs_input.vUV ); } 17   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   17  
  18. 18. BASIC  HLSL  VERTEX  SHADER   struct PosUV //Not available in GLES { float4 vPositionSS : SV_POSITION; float3 vPositionWS : POSITION; float2 vUV : TEXCOORD0; }; float4x4 mMVP; float4x4 mM; PosUV main( float3 Position : POSITION, float2 UV: TEXCOORD0) { PosUV vs_output; output.vPositionSS = mMVP * float4(Position, 1.0); vs_output.vPositionWS = mMP * float4(Position, 1.0); vs_output.vUV = UV; return vs_output; } 18   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   18  
  19. 19. BASIC  HLSL  PIXEL  SHADER   struct fsInput { float3 vPositionWS : POSITION; float2 vUV : TEXCOORD0; }; sampler sWrapTriLin; texture2D <float4> tDiffuse; float4 main(fsInput i) : SV_TARGET { return tDiffuse.Sample(sWrapTriLin, i.vUV); } 19   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   19  
  20. 20. BASIC  GEOMETRY  SHADER   layout (triangles) in; layout (triangle_strip, max_vertices = 3) out; void main(void) { for(int i=0; i < gl_in.length(); i++) { gl_Position = gl_in[i].gl_Position; EmitVertex(); } EndPrimitive(); } 20   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   20  
  21. 21. TESSELLATION   TessellaQon  Control   Hull  Shader   Patch  Constant  Func   Tess  factors   Tess  factors   Tessellator   Tessellator   Topology   Topology   TessellaQon   EvaluaQon   OpenGL  4.0   21   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Domain   Shader   D3D11   21  
  22. 22. TESSELLATION   // Tessellation Control layout (vertices = 4) out; void TCS(void) { if (gl_InvocationID == 0) { gl_TessLevelInner[0] = 2.0; … // Hull Shader [outputcontrolpoints(4)] [patchconstantfunc("ConstantsHS")] [domain("quad")] [partitioning(“integer")] [outputtopology("triangle_cw")] // Tessellation Evaluation layout (quads, cw, equal_spacing) in void TES(void) { … HS_OUTPUT HullShader(…) // Domain Shader DS_OUTPUT DomainShader(…) OpenGL  4.0   22   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   D3D11   22  
  23. 23. TESSELLATION  CONTROL   out patch float tessFactor; void main(void) { if (gl_InvocationID == 0) TessellaQon  rate  can  be  set  by  any   instance   { gl_TessLevelInner[0] = 2.0; … tessFactor = 2.0; } Values  can  be   communicated  across   threads   barrier(); DoSomeWork(tessFactor, gl_InvocationID); 23   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   23  
  24. 24. COMPUTE  SHADERS   Thread Group Thread Thread group size y Thread global size y global size x Thread group size x !  Groups  can  share  local  memory   !  Threads  can  be  synced  at  a  group  level   24   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   24  
  25. 25. OPENGL  COMPUTE   buffer BlockName { int linearOutput[] }; shared int var; layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) void ContrivedSample() { const uvec3 localIdx = gl_LocalInvocationID; const uvec3 globalIdx = gl_GlobalInvocationID; const uvec3 groupIdx = gl_WorkGroupID; if(localId.x == 0) var = groupIdx.x; barrier(); linearOutput[globalIdx.x] = var; } 25   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   25  
  26. 26. DIRECT  COMPUTE   RWStructuredBuffer<int> linearOutput; groupshared int var; [numthreads(64, 1, 1)] void ContrivedSample( uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID ) { if(localIdx.x == 0) var = groupIdx.x; GroupMemoryBarrierWithGroupSync(); linearOutput[globalIdx.x] = var; } 26   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   26  
  27. 27. PROGRAMMING  THE  GPU   SYNCHRONIZATION  
  28. 28. MEMORY  COHERENCE-­‐  GL  /  DX   Dispatch   CS   28   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Mem   CS   28  
  29. 29. MEMORY  COHERENCE-­‐  GL/DX  11.1   Draw   VS   Mem   GS   VS   GS   FS   FS   RT   29   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   29  
  30. 30. MEMORY  COHERENCE-­‐  GL  /  DX  11.1   Draw   VS   Mem   GS   FS   RT   30   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   30  
  31. 31. Feeding  the   GPU  
  32. 32. DRIVER  STACKS  (WINDOWS)    OpenGL  App   DirectX  App   OpenGL32.dll   D3D11.dll   D3D  UMD   OpenGL  ICD   DXGI   KMD   32   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   32  
  33. 33. DRIVER  STACKS  (LINUX)   App   libGL   Gallium3D   State  tracker   DRI   Or   Hardware  layer   Gallium3D   WinSys   libDRM-­‐radeon   drm   33   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   33  
  34. 34. FEEDING  THE  GPU   GPU-­‐CPU  SYNCHRONIZATION  
  35. 35. DRIVER  COMMAND  QUEUE   ApplicaQon   Dr   5   Da   5   Da  1   Dr  1   Da     6   Da  2   Dr   6   Dr  2   Da  3   Dr  3   Da  4   Dr  4   Da  5   Dr  5   Da  6   Dr  6   Driver/GPU   Time   Reorder  possible?   35   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   35  
  36. 36. CPU/GPU  MEMORY  SYNCHRONIZATION   BY  DRIVER   App   Memory   Driver   Copy   App   Memory   Driver   Copy   Hints   36   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   GPU   Read   Driver   Copy   GPU   Read   Stream,  StaQc,  Dynamic   Draw,  Read,  Copy  
  37. 37. CPU/GPU  MEMORY  SYNCHRONIZATION   MANUAL   App   Memory   Da  1   Dr  1   Driver   Copy   App  Copy   Da  2   Dr  2   Fence   37   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Da  3   Dr  3   Da  4   Dr  4   Da  5   GPU   Read   Dr  5   Da  6   Dr  6  
  38. 38. FEEDING  THE  GPU   DATA  
  39. 39. LEGACY  OPENGL  OBJECT  MODEL   !  glGenBuffers,  glGenTextures,  glGenSamplers,  …   ‒  Creates  name  /  handle   !  glBindBuffer,  glBindTexture,     ‒  Sets  as  current   !  glBufferData,  glTexSubImage,  glMapBuffer   ‒  Supplies  data   39   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   39  
  40. 40. BUFFER  BINDING  AND  CREATION   glBindBuffer(target,name)   Target   binding   BufferObject   State,  Usage   BufferData   desc.BindFlags  =  <Target>   pDevice-­‐>CreateBuffer(desc,…)   40   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   40  
  41. 41. SETTING  DATA  (SIMPLEST  OPTION)   glBufferData   (target,  size,  pData,  usage)   data   Target   binding   BufferObject   desc.Usage  =  <Usage>   desc.CPUAccessFlags  =  <RWUsage>   pDevice-­‐>CreateBuffer(desc,pData,)   41   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   41  
  42. 42. BUFFER  TARGETS   GL  Name   Typical  Purpose     DX  Equivalent   ARRAY   VerQces   VERTEX   ELEMENT_ARRAY   Indices   INDEX   UNIFORM   Read-­‐only  vars   CONSTANT   TEXTURE_BUFFER   Buffer-­‐as-­‐texture   CONSTANT  (tbuffer)   SHADER_STORAGE   Read/write   SHADER_RESOURCE   TRANSFORM_FEEDBACK   Stream  out   Stream  out   DRAW_INDIRECT   indirect  draw   DRAWINDIRECT   ATOMIC_COUNTER   Global  counter  var   UAV_FLAG_COUNTER   COPY_READ,  _WRITE   Copying  (opQonal)   Staging?   PIXEL_PACK,  _UNPACK   GPU  <-­‐>  CPU   Staging?   42   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   42  
  43. 43. DIRECTX  OBJECTS  AND  VIEWS   !  Resource  (base  class)   ‒  Usage:  default,  immutable,  dynamic,  staging   ‒  Bind  flags:  vertex,  index,  shader  resource,  …   !  Buffer   !  Texture2D,  …   !  DepthStencilView   !  RenderTargetView   !  ShaderResourceView   !  UnorderedAccessView   43   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   43  
  44. 44. OBJECT  AND  VIEW  EXAMPLE   D3D11_BUFFER_DESC desc; desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; … pDevice->CreateBuffer(&desc, data, &pBuffer); D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; srcDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; … pDevice->CreateShaderResourceView(pBuffer, &srvDesc, &pView); //at draw time pContext->VSSetShaderResources(0, 1, pView); 44   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   44  
  45. 45. DATA  TYPES   Image   45   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Linear  
  46. 46. IMMUTABLE  TEXTURES  (4.2,  GLES  3)   glGenTextures(1, &texObjName); glBindTexture(GL_TEXTURE_2D_ARRAY, texObjName); glTexStorage3D(GL_TEXTURE_2D_ARRAY, level, internalformat, width, height, depth); glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0,0,0, width, height, depth, format, type, pData);   CreateTexture2D( desc, srcDataLayout, pData); 46   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   46  
  47. 47. FEEDING  THE  GPU   PROGRAMS  
  48. 48. SHADER  MANAGEMENT  -­‐  OPENGL   Program  Object   GLuint shader = glCreateShader(GL_VERTEX_SHADER); Vertex  Shader   glShaderSource(…); glCompileShader(); Pixel  Shader   GLuint program = glCreateProgram(); glAttachShader(program, shader); glLinkProgram(program); glUseProgram(program);       48   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   48  
  49. 49. BASIC  GLSL  PIXEL  SHADER   in fsInput //Not available in GLES { vec3 vPositionWS; vec2 vUV; } fs_input; uniform sampler2D sDiffuse; out vec4 color_out; void main(void) { color_out = texture( sDiffuse, fs_input.vUV ); } 49   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   49  
  50. 50. BASIC  GLSL  VERTEX  SHADER   #version 430 in vec3 Position; in vec2 UV; out PosUV //Not available in GLES { vec3 vPositionWS; vec2 vUV; } vs_output; uniform mat4x4 mMVP; uniform mat4x4 mM; void main(void) { gl_Position = mMVP * vec4(Position, 1.0); vs_output.vPositionWS = mM * vec4(Position, 1.0); vs_output.vUV = UV; } 50   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   50  
  51. 51. SHADER  MANAGEMENT  -­‐  DX   D3DCompile(source,..,vs_5_0,..,&pByteCode) pShader = CreateVertexShader(pByteCode); VSSetShader(pShader,0,0); !  No  program  /  link  concept  in  API   51   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   51  
  52. 52. PROGRAM  BINARIES   OpenGL   glGetProgramBinary(program,…,format,pBinaryOut); DirectX   D3DCompile(source,..,vs_5_0,..,&pByteCode)   !  Program  level   !  Shader  level   !  In  theory:  format  choices   !  Portable  byte  code   !  In  pracQce:  somewhat  final,  non-­‐portable   52   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   52  
  53. 53. DRAW  CALLS   OpenGL   D3D   glDrawArrays   Draw   glDrawArraysInstanced   DrawInstanced(…,0)   glDrawArraysInstancedBaseInstance   DrawInstanced   glDrawArraysIndirect   DrawInstancedIndirect   glMulQDrawArrays   for(int  i=0;  i<n;  ++i)        Draw(count[i],  start[i]);   glMulQDrawArraysIndirect   for(int  i=0;  i<n;  ++i)        DrawInstancedIndirect(…)   glDrawElements   DrawIndexed   …And  so  forth   53   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   53  
  54. 54. COMPUTE  SHADERS   glDispatchCompute(nGroupsX,nGroupsY,nGroupsZ)   Dispatch(nGroupsX,nGroupsY,nGroupsZ )   glDispatchComputeIndirect(offset)   DispatchIndirect(pResource,offset)   OpenGL  4.3   54   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   D3D11   54  
  55. 55. Wrap  up  
  56. 56. IMAGE-­‐BASED  MODELING   56   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  57. 57. GENERATING  THE  MODEL   Render:  projecQon,   rasterizaQon,   texturing,  depth   buffering,  …   57   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  58. 58. TressFX   HAIR   !  AMD  technology  for  high-­‐quality  hair  rendering   !  Thousands  of  hair  strands  individually  simulated  and   rendered  on  the  GPU   !  DirectCompute  physics  simulaQon   !  Shader  Model  5.0  pixel  shader  using  compute  capabiliQes  for   rendering   58   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  59. 59. NOT  EXPOSED  IN  GRAPHICS  APIS  (YET)   !  Local  shared  memory  restricted  to   ‒  Compute     ‒  TessellaQon  Control,  in  a  limited  sense   !  Some  OpenCL  extensions  (e.g.,  64  bit  atomics)   !  Numerical  compliance   !  Some  OpenCL  1.2  addiQons   !  OpenCL  2.0  addiQons   59   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  60. 60. SUMMARY   The  graphics  pipeline     gives  you  access  to  different  hardware   There  are  addiQonal  synchroniza6on   issues  and  opportunites   Mix  and  match  for  the  best  of  both   compute  and  graphics   60   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  61. 61. DISCLAIMER  &  ATTRIBUTION   The  informaQon  presented  in  this  document  is  for  informaQonal  purposes  only  and  may  contain  technical  inaccuracies,  omissions  and  typographical  errors.     The  informaQon  contained  herein  is  subject  to  change  and  may  be  rendered  inaccurate  for  many  reasons,  including  but  not  limited  to  product  and  roadmap   changes,  component  and  motherboard  version  changes,  new  model  and/or  product  releases,  product  differences  between  differing  manufacturers,  sozware   changes,  BIOS  flashes,  firmware  upgrades,  or  the  like.  AMD  assumes  no  obligaQon  to  update  or  otherwise  correct  or  revise  this  informaQon.  However,  AMD   reserves  the  right  to  revise  this  informaQon  and  to  make  changes  from  Qme  to  Qme  to  the  content  hereof  without  obligaQon  of  AMD  to  noQfy  any  person  of   such  revisions  or  changes.     AMD  MAKES  NO  REPRESENTATIONS  OR  WARRANTIES  WITH  RESPECT  TO  THE  CONTENTS  HEREOF  AND  ASSUMES  NO  RESPONSIBILITY  FOR  ANY   INACCURACIES,  ERRORS  OR  OMISSIONS  THAT  MAY  APPEAR  IN  THIS  INFORMATION.     AMD  SPECIFICALLY  DISCLAIMS  ANY  IMPLIED  WARRANTIES  OF  MERCHANTABILITY  OR  FITNESS  FOR  ANY  PARTICULAR  PURPOSE.  IN  NO  EVENT  WILL  AMD  BE   LIABLE  TO  ANY  PERSON  FOR  ANY  DIRECT,  INDIRECT,  SPECIAL  OR  OTHER  CONSEQUENTIAL  DAMAGES  ARISING  FROM  THE  USE  OF  ANY  INFORMATION   CONTAINED  HEREIN,  EVEN  IF  AMD  IS  EXPRESSLY  ADVISED  OF  THE  POSSIBILITY  OF  SUCH  DAMAGES.     ATTRIBUTION   ©  2013  Advanced  Micro  Devices,  Inc.  All  rights  reserved.  AMD,  the  AMD  Arrow  logo  and  combinaQons  thereof  are  trademarks  of  Advanced  Micro  Devices,   Inc.  in  the  United  States  and/or  other  jurisdicQons.    SPEC    is  a  registered  trademark  of  the  Standard  Performance  EvaluaQon  CorporaQon  (SPEC).  Other   names  are  for  informaQonal  purposes  only  and  may  be  trademarks  of  their  respecQve  owners.   61   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  1. A particular slide catching your eye?

    Clipping is a handy way to collect important slides you want to go back to later.

×