SlideShare a Scribd company logo
USING	
  OPENGL	
  AND	
  DIRECTX	
  FOR	
  
HETEROGENEOUS	
  COMPUTE	
  
KARL	
  HILLESLAND	
  
AGENDA	
  

THE	
  GRAPHICS	
  PIPELINE	
  

PROGRAMMING	
  THE	
  GPU	
  

FEEDING	
  THE	
  GPU	
  

2	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
The	
  Graphics	
  
Pipeline	
  
GRAPHICS	
  PIPELINE	
  
SHADER	
  CENTRIC	
  

OpenGL	
  

DirectX	
  

	
  
Vertex	
  Shader	
  

	
  

Vertex	
  Shader

	
  

TessellaQon	
  Control	
  Shader

	
  
TessellaQon	
  EvaluaQon	
  Shader	
  
Geometry	
  Shader	
  
Rasterizer	
  
Fragment	
  Shader	
  
Per-­‐Fragment	
  OperaQons	
  

TessellaQon	
  PrimiQve	
  Generator

4	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

	
  

Input	
  Assembler

Vertex	
  Puller

	
  
Tessellator	
  

Hull	
  Shader

	
  

Domain	
  Shader

	
  

Geometry	
  Shader

	
  

Rasterizer

	
  

Pixel	
  Shader

	
  

Output	
  Merger
GRAPHICS	
  PIPELINE	
  
MORE	
  DETAILS	
  

indices,	
  
verQces	
  

	
  

Input	
  Assembler

Thread	
  per	
  DS	
  vertex	
  (n3)	
  
Barycentric	
  
Domain	
  Shader

	
  

DS	
  vertex	
  
Collects	
  prims

	
  

vertex	
  

	
  

	
  

Tessellator

Patch	
  verts	
  n2	
  

	
  

	
  

Prim	
  verts	
   Geometry	
  Shader

5	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

Prims	
  

Collects	
  Patches
Patch	
  verts	
  n1	
  

Tess	
  
factors	
  

Collects	
  patches

	
  

vertex	
  

Vertex	
  Shader
Thread	
  
per	
  vertex	
  

	
  

Patch	
  Constant

	
  

Hull	
  Shader

Control	
  point	
  

	
  

PrimiQve	
  Assembler

Thread	
  per	
  output	
  
control	
  point	
  n2	
  

	
  

Next	
  Slide
prim	
  

Hi-­‐Z/Stencil	
  info	
  

	
  

	
  

Rasterizer	
  2

	
  

Unroller

Rasterizer	
  1

Hi-­‐Z/Stencil

Unrolling,	
  
Masking

	
  

Pixel	
  Shader

	
  

	
  

Reordering

Depth/Stencil

	
  

	
  

	
  

	
  

Blending

Not	
  shown:	
  Any	
  shader	
  stage	
  can	
  read/write	
  to	
  memory,	
  
including	
  atomics,	
  filtering*,	
  decompression,	
  and	
  sRGB	
  
conversion	
  

	
  

Collects	
  Quads

Conversion

6	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

Early-­‐Z/Stencil

	
  
WHAT’S	
  THE	
  POINT?	
  
!  The	
  Graphics	
  pipeline	
  has	
  a	
  lot	
  more	
  parts	
  
‒  Reorganizes	
  threads	
  
‒  Tracks	
  dependencies	
  
‒  Reorders	
  
‒  Extra	
  fixed-­‐funcQon	
  units	
  

!  Are	
  they	
  usable?	
  

7	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
GRAPHICS	
  IN	
  THE	
  NINETIES	
  	
  

	
  

Input	
  Assembler

	
  

Transform	
  and	
  LighQng

	
  

Rasterizer

	
  

Texturing	
  and	
  Fog

	
  

Output	
  Merger

8	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
VORONOI	
  DIAGRAMS	
  
GPGPU	
  WITHOUT	
  SHADERS	
  

!  Color	
  according	
  to	
  closest	
  
‒  Point	
  
‒  Line	
  

!  Could	
  be	
  weighted	
  
!  Useful	
  for	
  	
  
‒  Collision	
  DetecQon	
  
‒  Surface	
  ReconstrucQon	
  
‒  Robot	
  MoQon	
  Planning	
  
‒  Non-­‐PhotorealisQc	
  Rendering	
  
‒  Surface	
  SimplificaQon	
  
‒  Mesh	
  GeneraQon	
  

9	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
VORONOI	
  DIAGRAMS	
  IN	
  THE	
  NINETIES	
  

Simply	
  rasterize	
  the	
  
cones	
  using	
  graphics	
  
hardware	
  

Haeberli90,	
  Woo97	
  
10	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

2-­‐part	
  discrete	
  Voronoi	
  
diagram	
  representaQon	
  
Color	
  Buffer	
  

Site	
  IDs	
  

Depth	
  Buffer	
  

Distance	
  
OPENGL	
  1	
  SIMD	
  MACHINE	
  
PEERCY,	
  ET.	
  AL.	
  SIGGRAPH	
  2000	
  

SIMD	
  Concept	
  

OpenGL	
  1	
  SIMD	
  

InstrucQon	
  

OpenGL	
  call	
  (CPU)	
  

SIMD	
  Lane	
  

Pixel	
  

SIMD	
  Lane	
  Input	
  Data	
  

Texel	
  

SIMD	
  Lane	
  Output	
  Data	
  

Fragment	
  

ALU	
  

Blend	
  OperaQon	
  

CondiQonals	
  

Alpha	
  and	
  Stencil	
  Tests	
  

11	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

float y;
float4 contrived_example()
{
float x = f(u,v)
if( x*y > 0)
{
x = x + g(u,v)
}
return x*h(u,v);
}
USING	
  EARLY-­‐Z	
  OR	
  STENCIL	
  

Texture-­‐space	
  blur	
  

With	
  back-­‐face	
  culling	
  

ApplicaQons	
  of	
  Explicit	
  Early-­‐Z	
  Culling,	
  Real-­‐Time	
  Shading	
  Course,	
  Siggraph	
  2004.	
  

12	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

Pressure	
  buffer	
  
used	
  for	
  sim	
  culling	
  
What’s	
  the	
  Point?	
  

The	
  graphics	
  pipeline	
  	
  
gives	
  you	
  access	
  to	
  more	
  

13	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
Programming	
  
the	
  GPU	
  
SHADER	
  TYPES	
  
!  Compute	
  (4.3)	
  
!  Vertex	
  (2,	
  ES	
  2)	
  
!  TessellaQon	
  Control	
  (4)	
  
!  TessellaQon	
  EvaluaQon	
  (4)	
  	
  
!  Geometry	
  (3)	
  
!  Fragment	
  (2,	
  ES	
  2)	
  

OpenGL	
  
15	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

! 
! 
! 
! 
! 
! 

Compute	
  (11)	
  
Vertex	
  (8)	
  
Hull	
  (11)	
  
Domain	
  (11)	
  
Geometry	
  (10)	
  
Pixel	
  (9)	
  

D3D	
  
15	
  
BASIC	
  GLSL	
  VERTEX	
  SHADER	
  
#version 430
in vec3 Position;
in vec2 UV;
out PosUV //Not available in GLES
{
vec3 vPositionWS;
vec2 vUV;
} vs_output;
uniform mat4x4 mMVP;
uniform mat4x4 mM;
void main(void)
{
gl_Position = mMVP * vec4(Position, 1.0);
vs_output.vPositionWS = mM * vec4(Position, 1.0);
vs_output.vUV = UV;
}
16	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

16	
  
BASIC	
  GLSL	
  PIXEL	
  SHADER	
  
in fsInput //Not available in GLES
{
vec3 vPositionWS;
vec2 vUV;
} fs_input;
uniform sampler2D sDiffuse;
out vec4 color_out;
void main(void)
{
color_out = texture( sDiffuse, fs_input.vUV );
}
17	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

17	
  
BASIC	
  HLSL	
  VERTEX	
  SHADER	
  
struct PosUV //Not available in GLES
{
float4 vPositionSS : SV_POSITION;
float3 vPositionWS : POSITION;
float2 vUV : TEXCOORD0;
};
float4x4 mMVP;
float4x4 mM;
PosUV main(
float3 Position : POSITION,
float2 UV: TEXCOORD0)
{
PosUV vs_output;
output.vPositionSS = mMVP * float4(Position, 1.0);
vs_output.vPositionWS = mMP * float4(Position, 1.0);
vs_output.vUV = UV;
return vs_output;
}

18	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

18	
  
BASIC	
  HLSL	
  PIXEL	
  SHADER	
  
struct fsInput
{
float3 vPositionWS : POSITION;
float2 vUV : TEXCOORD0;
};
sampler sWrapTriLin;
texture2D <float4> tDiffuse;
float4 main(fsInput i) : SV_TARGET
{
return tDiffuse.Sample(sWrapTriLin, i.vUV);
}
19	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

19	
  
BASIC	
  GEOMETRY	
  SHADER	
  
layout (triangles) in;
layout (triangle_strip, max_vertices = 3) out;
void main(void)
{
for(int i=0; i < gl_in.length(); i++)
{
gl_Position = gl_in[i].gl_Position;
EmitVertex();
}
EndPrimitive();
}
20	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

20	
  
TESSELLATION	
  
TessellaQon	
  Control	
  

Hull	
  Shader	
  

Patch	
  Constant	
  Func	
  

Tess	
  factors	
  
Tess	
  factors	
  
Tessellator	
  

Tessellator	
  

Topology	
  

Topology	
  
TessellaQon	
  
EvaluaQon	
  

OpenGL	
  4.0	
  
21	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

Domain	
  
Shader	
  

D3D11	
  
21	
  
TESSELLATION	
  
// Tessellation Control
layout (vertices = 4) out;
void TCS(void)
{
if (gl_InvocationID == 0)
{
gl_TessLevelInner[0] = 2.0;
…

// Hull Shader
[outputcontrolpoints(4)]
[patchconstantfunc("ConstantsHS")]
[domain("quad")]
[partitioning(“integer")]
[outputtopology("triangle_cw")]

// Tessellation Evaluation
layout (quads, cw, equal_spacing) in
void TES(void)
{
…

HS_OUTPUT HullShader(…)
// Domain Shader
DS_OUTPUT DomainShader(…)

OpenGL	
  4.0	
  
22	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

D3D11	
  
22	
  
TESSELLATION	
  CONTROL	
  
out patch float tessFactor;
void main(void)
{
if (gl_InvocationID == 0)

TessellaQon	
  rate	
  can	
  be	
  set	
  by	
  any	
  
instance	
  

{
gl_TessLevelInner[0] = 2.0;
…
tessFactor = 2.0;
}

Values	
  can	
  be	
  
communicated	
  across	
  
threads	
  

barrier();
DoSomeWork(tessFactor, gl_InvocationID);

23	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

23	
  
COMPUTE	
  SHADERS	
  
Thread Group

Thread

Thread

group size y

Thread

global size y

global size x

Thread

group size x

!  Groups	
  can	
  share	
  local	
  memory	
  
!  Threads	
  can	
  be	
  synced	
  at	
  a	
  group	
  level	
  

24	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

24	
  
OPENGL	
  COMPUTE	
  
buffer BlockName { int linearOutput[] };
shared int var;
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1)
void ContrivedSample()
{
const uvec3 localIdx = gl_LocalInvocationID;
const uvec3 globalIdx = gl_GlobalInvocationID;
const uvec3 groupIdx = gl_WorkGroupID;
if(localId.x == 0)
var =

groupIdx.x;

barrier();
linearOutput[globalIdx.x] = var;
}
25	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

25	
  
DIRECT	
  COMPUTE	
  
RWStructuredBuffer<int> linearOutput;
groupshared int var;
[numthreads(64, 1, 1)]
void ContrivedSample(
uint3 globalIdx : SV_DispatchThreadID,
uint3 localIdx : SV_GroupThreadID,
uint3 groupIdx : SV_GroupID )
{
if(localIdx.x == 0)
var = groupIdx.x;
GroupMemoryBarrierWithGroupSync();
linearOutput[globalIdx.x] = var;
}

26	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

26	
  
PROGRAMMING	
  THE	
  GPU	
  
SYNCHRONIZATION	
  
MEMORY	
  COHERENCE-­‐	
  GL	
  /	
  DX	
  
Dispatch	
  

CS	
  

28	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

Mem	
  

CS	
  

28	
  
MEMORY	
  COHERENCE-­‐	
  GL/DX	
  11.1	
  
Draw	
  

VS	
  

Mem	
  

GS	
  

VS	
  
GS	
  

FS	
  
FS	
  

RT	
  

29	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

29	
  
MEMORY	
  COHERENCE-­‐	
  GL	
  /	
  DX	
  11.1	
  

Draw	
  
VS	
  

Mem	
  

GS	
  

FS	
  
RT	
  

30	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

30	
  
Feeding	
  the	
  
GPU	
  
DRIVER	
  STACKS	
  (WINDOWS)	
  

	
  OpenGL	
  App	
  

DirectX	
  App	
  

OpenGL32.dll	
  

D3D11.dll	
  

D3D	
  UMD	
  

OpenGL	
  ICD	
  

DXGI	
  
KMD	
  

32	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

32	
  
DRIVER	
  STACKS	
  (LINUX)	
  

App	
  
libGL	
  

Gallium3D	
  
State	
  tracker	
  
DRI	
  

Or	
  

Hardware	
  layer	
  
Gallium3D	
  
WinSys	
  

libDRM-­‐radeon	
  

drm	
  

33	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

33	
  
FEEDING	
  THE	
  GPU	
  
GPU-­‐CPU	
  SYNCHRONIZATION	
  
DRIVER	
  COMMAND	
  QUEUE	
  
ApplicaQon	
  
Dr	
  
5	
  

Da	
  
5	
  

Da	
  1	
  

Dr	
  1	
  

Da	
  	
  
6	
  

Da	
  2	
  

Dr	
  
6	
  

Dr	
  2	
  

Da	
  3	
  

Dr	
  3	
  

Da	
  4	
  

Dr	
  4	
  

Da	
  5	
  

Dr	
  5	
  

Da	
  6	
  

Dr	
  6	
  

Driver/GPU	
  
Time	
  
Reorder	
  possible?	
  

35	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

35	
  
CPU/GPU	
  MEMORY	
  SYNCHRONIZATION	
  
BY	
  DRIVER	
  

App	
  
Memory	
  

Driver	
  
Copy	
  

App	
  
Memory	
  

Driver	
  
Copy	
  

Hints	
  

36	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

GPU	
  
Read	
  
Driver	
  
Copy	
  

GPU	
  
Read	
  

Stream,	
  StaQc,	
  Dynamic	
  
Draw,	
  Read,	
  Copy	
  
CPU/GPU	
  MEMORY	
  SYNCHRONIZATION	
  
MANUAL	
  

App	
  
Memory	
  

Da	
  1	
  

Dr	
  1	
  

Driver	
  
Copy	
  

App	
  Copy	
  

Da	
  2	
  

Dr	
  2	
  

Fence	
  
37	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

Da	
  3	
  

Dr	
  3	
  

Da	
  4	
  

Dr	
  4	
  

Da	
  5	
  

GPU	
  
Read	
  

Dr	
  5	
  

Da	
  6	
  

Dr	
  6	
  
FEEDING	
  THE	
  GPU	
  
DATA	
  
LEGACY	
  OPENGL	
  OBJECT	
  MODEL	
  
!  glGenBuffers,	
  glGenTextures,	
  glGenSamplers,	
  …	
  
‒  Creates	
  name	
  /	
  handle	
  

!  glBindBuffer,	
  glBindTexture,	
  	
  
‒  Sets	
  as	
  current	
  

!  glBufferData,	
  glTexSubImage,	
  glMapBuffer	
  
‒  Supplies	
  data	
  

39	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

39	
  
BUFFER	
  BINDING	
  AND	
  CREATION	
  

glBindBuffer(target,name)	
  

Target	
  

binding	
  

BufferObject	
  

State,	
  Usage	
  

BufferData	
  

desc.BindFlags	
  =	
  <Target>	
  
pDevice-­‐>CreateBuffer(desc,…)	
  

40	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

40	
  
SETTING	
  DATA	
  (SIMPLEST	
  OPTION)	
  

glBufferData	
  
(target,	
  size,	
  pData,	
  usage)	
  

data	
  

Target	
  

binding	
  

BufferObject	
  

desc.Usage	
  =	
  <Usage>	
  
desc.CPUAccessFlags	
  =	
  <RWUsage>	
  
pDevice-­‐>CreateBuffer(desc,pData,)	
  
41	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

41	
  
BUFFER	
  TARGETS	
  
GL	
  Name	
  

Typical	
  Purpose	
  	
  

DX	
  Equivalent	
  

ARRAY	
  

VerQces	
  

VERTEX	
  

ELEMENT_ARRAY	
  

Indices	
  

INDEX	
  

UNIFORM	
  

Read-­‐only	
  vars	
  

CONSTANT	
  

TEXTURE_BUFFER	
  

Buffer-­‐as-­‐texture	
  

CONSTANT	
  (tbuffer)	
  

SHADER_STORAGE	
  

Read/write	
  

SHADER_RESOURCE	
  

TRANSFORM_FEEDBACK	
  

Stream	
  out	
  

Stream	
  out	
  

DRAW_INDIRECT	
  

indirect	
  draw	
  

DRAWINDIRECT	
  

ATOMIC_COUNTER	
  

Global	
  counter	
  var	
  

UAV_FLAG_COUNTER	
  

COPY_READ,	
  _WRITE	
  

Copying	
  (opQonal)	
  

Staging?	
  

PIXEL_PACK,	
  _UNPACK	
  

GPU	
  <-­‐>	
  CPU	
  

Staging?	
  

42	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

42	
  
DIRECTX	
  OBJECTS	
  AND	
  VIEWS	
  
!  Resource	
  (base	
  class)	
  
‒  Usage:	
  default,	
  immutable,	
  dynamic,	
  staging	
  
‒  Bind	
  flags:	
  vertex,	
  index,	
  shader	
  resource,	
  …	
  

!  Buffer	
  
!  Texture2D,	
  …	
  
!  DepthStencilView	
  
!  RenderTargetView	
  
!  ShaderResourceView	
  
!  UnorderedAccessView	
  

43	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

43	
  
OBJECT	
  AND	
  VIEW	
  EXAMPLE	
  
D3D11_BUFFER_DESC desc;
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
…
pDevice->CreateBuffer(&desc, data, &pBuffer);
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc;
srcDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
…
pDevice->CreateShaderResourceView(pBuffer, &srvDesc, &pView);
//at draw time
pContext->VSSetShaderResources(0, 1, pView);
44	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

44	
  
DATA	
  TYPES	
  
Image	
  

45	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

Linear	
  
IMMUTABLE	
  TEXTURES	
  (4.2,	
  GLES	
  3)	
  
glGenTextures(1, &texObjName);
glBindTexture(GL_TEXTURE_2D_ARRAY,
texObjName);
glTexStorage3D(GL_TEXTURE_2D_ARRAY, level, internalformat,
width, height, depth);
glTexSubImage3D(GL_TEXTURE_2D_ARRAY,
0,0,0, width, height, depth,
format, type, pData);
	
  

CreateTexture2D( desc, srcDataLayout, pData);

46	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

46	
  
FEEDING	
  THE	
  GPU	
  
PROGRAMS	
  
SHADER	
  MANAGEMENT	
  -­‐	
  OPENGL	
  
Program	
  Object	
  
GLuint shader = glCreateShader(GL_VERTEX_SHADER);

Vertex	
  Shader	
  

glShaderSource(…);
glCompileShader();

Pixel	
  Shader	
  

GLuint program = glCreateProgram();
glAttachShader(program, shader);
glLinkProgram(program);
glUseProgram(program);

	
  

	
  
	
  

48	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

48	
  
BASIC	
  GLSL	
  PIXEL	
  SHADER	
  
in fsInput //Not available in GLES
{
vec3 vPositionWS;
vec2 vUV;
} fs_input;
uniform sampler2D sDiffuse;
out vec4 color_out;
void main(void)
{
color_out = texture( sDiffuse, fs_input.vUV );
}
49	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

49	
  
BASIC	
  GLSL	
  VERTEX	
  SHADER	
  
#version 430
in vec3 Position;
in vec2 UV;
out PosUV //Not available in GLES
{
vec3 vPositionWS;
vec2 vUV;
} vs_output;
uniform mat4x4 mMVP;
uniform mat4x4 mM;
void main(void)
{
gl_Position = mMVP * vec4(Position, 1.0);
vs_output.vPositionWS = mM * vec4(Position, 1.0);
vs_output.vUV = UV;
}
50	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

50	
  
SHADER	
  MANAGEMENT	
  -­‐	
  DX	
  
D3DCompile(source,..,vs_5_0,..,&pByteCode)
pShader = CreateVertexShader(pByteCode);
VSSetShader(pShader,0,0);

!  No	
  program	
  /	
  link	
  concept	
  in	
  API	
  

51	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

51	
  
PROGRAM	
  BINARIES	
  
OpenGL	
  
glGetProgramBinary(program,…,format,pBinaryOut);

DirectX	
  
D3DCompile(source,..,vs_5_0,..,&pByteCode)

	
  
!  Program	
  level	
  

!  Shader	
  level	
  

!  In	
  theory:	
  format	
  choices	
  

!  Portable	
  byte	
  code	
  

!  In	
  pracQce:	
  somewhat	
  final,	
  non-­‐portable	
  

52	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

52	
  
DRAW	
  CALLS	
  
OpenGL	
  

D3D	
  

glDrawArrays	
  

Draw	
  

glDrawArraysInstanced	
  

DrawInstanced(…,0)	
  

glDrawArraysInstancedBaseInstance	
  

DrawInstanced	
  

glDrawArraysIndirect	
  

DrawInstancedIndirect	
  

glMulQDrawArrays	
  

for(int	
  i=0;	
  i<n;	
  ++i)	
  
	
  	
  	
  Draw(count[i],	
  start[i]);	
  

glMulQDrawArraysIndirect	
  

for(int	
  i=0;	
  i<n;	
  ++i)	
  
	
  	
  	
  DrawInstancedIndirect(…)	
  

glDrawElements	
  

DrawIndexed	
  

…And	
  so	
  forth	
  

53	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

53	
  
COMPUTE	
  SHADERS	
  
glDispatchCompute(nGroupsX,nGroupsY,nGroupsZ)
	
  

Dispatch(nGroupsX,nGroupsY,nGroupsZ
)

	
  

glDispatchComputeIndirect(offset)

	
  

DispatchIndirect(pResource,offset)

	
  
OpenGL	
  4.3	
  

54	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

D3D11	
  

54	
  
Wrap	
  up	
  
IMAGE-­‐BASED	
  MODELING	
  

56	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
GENERATING	
  THE	
  MODEL	
  
Render:	
  projecQon,	
  
rasterizaQon,	
  
texturing,	
  depth	
  
buffering,	
  …	
  

57	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
TressFX	
  
HAIR	
  

!  AMD	
  technology	
  for	
  high-­‐quality	
  hair	
  rendering	
  
!  Thousands	
  of	
  hair	
  strands	
  individually	
  simulated	
  and	
  
rendered	
  on	
  the	
  GPU	
  
!  DirectCompute	
  physics	
  simulaQon	
  
!  Shader	
  Model	
  5.0	
  pixel	
  shader	
  using	
  compute	
  capabiliQes	
  for	
  
rendering	
  

58	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
NOT	
  EXPOSED	
  IN	
  GRAPHICS	
  APIS	
  (YET)	
  
!  Local	
  shared	
  memory	
  restricted	
  to	
  
‒  Compute	
  	
  
‒  TessellaQon	
  Control,	
  in	
  a	
  limited	
  sense	
  

!  Some	
  OpenCL	
  extensions	
  (e.g.,	
  64	
  bit	
  atomics)	
  
!  Numerical	
  compliance	
  
!  Some	
  OpenCL	
  1.2	
  addiQons	
  
!  OpenCL	
  2.0	
  addiQons	
  

59	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
SUMMARY	
  

The	
  graphics	
  pipeline	
  	
  
gives	
  you	
  access	
  to	
  different	
  hardware	
  

There	
  are	
  addiQonal	
  synchroniza6on	
  
issues	
  and	
  opportunites	
  

Mix	
  and	
  match	
  for	
  the	
  best	
  of	
  both	
  
compute	
  and	
  graphics	
  
60	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  
DISCLAIMER	
  &	
  ATTRIBUTION	
  
The	
  informaQon	
  presented	
  in	
  this	
  document	
  is	
  for	
  informaQonal	
  purposes	
  only	
  and	
  may	
  contain	
  technical	
  inaccuracies,	
  omissions	
  and	
  typographical	
  errors.	
  
	
  
The	
  informaQon	
  contained	
  herein	
  is	
  subject	
  to	
  change	
  and	
  may	
  be	
  rendered	
  inaccurate	
  for	
  many	
  reasons,	
  including	
  but	
  not	
  limited	
  to	
  product	
  and	
  roadmap	
  
changes,	
  component	
  and	
  motherboard	
  version	
  changes,	
  new	
  model	
  and/or	
  product	
  releases,	
  product	
  differences	
  between	
  differing	
  manufacturers,	
  sozware	
  
changes,	
  BIOS	
  flashes,	
  firmware	
  upgrades,	
  or	
  the	
  like.	
  AMD	
  assumes	
  no	
  obligaQon	
  to	
  update	
  or	
  otherwise	
  correct	
  or	
  revise	
  this	
  informaQon.	
  However,	
  AMD	
  
reserves	
  the	
  right	
  to	
  revise	
  this	
  informaQon	
  and	
  to	
  make	
  changes	
  from	
  Qme	
  to	
  Qme	
  to	
  the	
  content	
  hereof	
  without	
  obligaQon	
  of	
  AMD	
  to	
  noQfy	
  any	
  person	
  of	
  
such	
  revisions	
  or	
  changes.	
  
	
  
AMD	
  MAKES	
  NO	
  REPRESENTATIONS	
  OR	
  WARRANTIES	
  WITH	
  RESPECT	
  TO	
  THE	
  CONTENTS	
  HEREOF	
  AND	
  ASSUMES	
  NO	
  RESPONSIBILITY	
  FOR	
  ANY	
  
INACCURACIES,	
  ERRORS	
  OR	
  OMISSIONS	
  THAT	
  MAY	
  APPEAR	
  IN	
  THIS	
  INFORMATION.	
  
	
  
AMD	
  SPECIFICALLY	
  DISCLAIMS	
  ANY	
  IMPLIED	
  WARRANTIES	
  OF	
  MERCHANTABILITY	
  OR	
  FITNESS	
  FOR	
  ANY	
  PARTICULAR	
  PURPOSE.	
  IN	
  NO	
  EVENT	
  WILL	
  AMD	
  BE	
  
LIABLE	
  TO	
  ANY	
  PERSON	
  FOR	
  ANY	
  DIRECT,	
  INDIRECT,	
  SPECIAL	
  OR	
  OTHER	
  CONSEQUENTIAL	
  DAMAGES	
  ARISING	
  FROM	
  THE	
  USE	
  OF	
  ANY	
  INFORMATION	
  
CONTAINED	
  HEREIN,	
  EVEN	
  IF	
  AMD	
  IS	
  EXPRESSLY	
  ADVISED	
  OF	
  THE	
  POSSIBILITY	
  OF	
  SUCH	
  DAMAGES.	
  
	
  
ATTRIBUTION	
  
©	
  2013	
  Advanced	
  Micro	
  Devices,	
  Inc.	
  All	
  rights	
  reserved.	
  AMD,	
  the	
  AMD	
  Arrow	
  logo	
  and	
  combinaQons	
  thereof	
  are	
  trademarks	
  of	
  Advanced	
  Micro	
  Devices,	
  
Inc.	
  in	
  the	
  United	
  States	
  and/or	
  other	
  jurisdicQons.	
  	
  SPEC	
  	
  is	
  a	
  registered	
  trademark	
  of	
  the	
  Standard	
  Performance	
  EvaluaQon	
  CorporaQon	
  (SPEC).	
  Other	
  
names	
  are	
  for	
  informaQonal	
  purposes	
  only	
  and	
  may	
  be	
  trademarks	
  of	
  their	
  respecQve	
  owners.	
  

61	
   |	
  	
  	
  PRESENTATION	
  TITLE	
  	
  	
  |	
  	
  	
  DECEMBER	
  4,	
  2013	
  	
  	
  |	
  	
  	
  CONFIDENTIAL	
  

More Related Content

What's hot

PG-4036, Computational Fluid Dynamics of the blood flow in the cloud, by Jaku...
PG-4036, Computational Fluid Dynamics of the blood flow in the cloud, by Jaku...PG-4036, Computational Fluid Dynamics of the blood flow in the cloud, by Jaku...
PG-4036, Computational Fluid Dynamics of the blood flow in the cloud, by Jaku...
AMD Developer Central
 
PT-4054, "OpenCL™ Accelerated Compute Libraries" by John Melonakos
PT-4054, "OpenCL™ Accelerated Compute Libraries" by John MelonakosPT-4054, "OpenCL™ Accelerated Compute Libraries" by John Melonakos
PT-4054, "OpenCL™ Accelerated Compute Libraries" by John Melonakos
AMD Developer Central
 
GS-4150, Bullet 3 OpenCL Rigid Body Simulation, by Erwin Coumans
GS-4150, Bullet 3 OpenCL Rigid Body Simulation, by Erwin CoumansGS-4150, Bullet 3 OpenCL Rigid Body Simulation, by Erwin Coumans
GS-4150, Bullet 3 OpenCL Rigid Body Simulation, by Erwin Coumans
AMD Developer Central
 
PT-4102, Simulation, Compilation and Debugging of OpenCL on the AMD Southern ...
PT-4102, Simulation, Compilation and Debugging of OpenCL on the AMD Southern ...PT-4102, Simulation, Compilation and Debugging of OpenCL on the AMD Southern ...
PT-4102, Simulation, Compilation and Debugging of OpenCL on the AMD Southern ...
AMD Developer Central
 
WT-4064, Build Rich Applications with HTML5 and WebGL, by Tony Parisi
WT-4064, Build Rich Applications with HTML5 and WebGL, by Tony ParisiWT-4064, Build Rich Applications with HTML5 and WebGL, by Tony Parisi
WT-4064, Build Rich Applications with HTML5 and WebGL, by Tony Parisi
AMD Developer Central
 
WT-4072, Rendering Web Content at 60fps, by Vangelis Kokkevis, Antoine Labour...
WT-4072, Rendering Web Content at 60fps, by Vangelis Kokkevis, Antoine Labour...WT-4072, Rendering Web Content at 60fps, by Vangelis Kokkevis, Antoine Labour...
WT-4072, Rendering Web Content at 60fps, by Vangelis Kokkevis, Antoine Labour...
AMD Developer Central
 
WT-4073, ANGLE and cross-platform WebGL support, by Shannon Woods
WT-4073, ANGLE and cross-platform WebGL support, by Shannon WoodsWT-4073, ANGLE and cross-platform WebGL support, by Shannon Woods
WT-4073, ANGLE and cross-platform WebGL support, by Shannon Woods
AMD Developer Central
 
PL-4042, Wholly Graal: Accelerating GPU offload for Java/Sumatra using the Op...
PL-4042, Wholly Graal: Accelerating GPU offload for Java/Sumatra using the Op...PL-4042, Wholly Graal: Accelerating GPU offload for Java/Sumatra using the Op...
PL-4042, Wholly Graal: Accelerating GPU offload for Java/Sumatra using the Op...
AMD Developer Central
 
WT-4069, WebCL: Enabling OpenCL Acceleration of Web Applications, by Mikael ...
WT-4069, WebCL: Enabling OpenCL Acceleration of Web Applications, by  Mikael ...WT-4069, WebCL: Enabling OpenCL Acceleration of Web Applications, by  Mikael ...
WT-4069, WebCL: Enabling OpenCL Acceleration of Web Applications, by Mikael ...
AMD Developer Central
 
Direct3D12 and the Future of Graphics APIs by Dave Oldcorn
Direct3D12 and the Future of Graphics APIs by Dave OldcornDirect3D12 and the Future of Graphics APIs by Dave Oldcorn
Direct3D12 and the Future of Graphics APIs by Dave Oldcorn
AMD Developer Central
 
Keynote (Mike Muller) - Is There Anything New in Heterogeneous Computing - by...
Keynote (Mike Muller) - Is There Anything New in Heterogeneous Computing - by...Keynote (Mike Muller) - Is There Anything New in Heterogeneous Computing - by...
Keynote (Mike Muller) - Is There Anything New in Heterogeneous Computing - by...
AMD Developer Central
 
GS-4152, AMD’s Radeon R9-290X, One Big dGPU, by Michael Mantor
GS-4152, AMD’s Radeon R9-290X, One Big dGPU, by Michael MantorGS-4152, AMD’s Radeon R9-290X, One Big dGPU, by Michael Mantor
GS-4152, AMD’s Radeon R9-290X, One Big dGPU, by Michael Mantor
AMD Developer Central
 
WT-4065, Superconductor: GPU Web Programming for Big Data Visualization, by ...
WT-4065, Superconductor: GPU Web Programming for Big Data Visualization, by  ...WT-4065, Superconductor: GPU Web Programming for Big Data Visualization, by  ...
WT-4065, Superconductor: GPU Web Programming for Big Data Visualization, by ...
AMD Developer Central
 
PG-4037, Fast modal analysis with NX Nastran and GPUs, by Leonard Hoffnung
PG-4037, Fast modal analysis with NX Nastran and GPUs, by Leonard HoffnungPG-4037, Fast modal analysis with NX Nastran and GPUs, by Leonard Hoffnung
PG-4037, Fast modal analysis with NX Nastran and GPUs, by Leonard Hoffnung
AMD Developer Central
 
PL-4051, An Introduction to SPIR for OpenCL Application Developers and Compil...
PL-4051, An Introduction to SPIR for OpenCL Application Developers and Compil...PL-4051, An Introduction to SPIR for OpenCL Application Developers and Compil...
PL-4051, An Introduction to SPIR for OpenCL Application Developers and Compil...
AMD Developer Central
 
WT-4151, Efficient Delivery of 3D Web Contents with Khronos and MPEG Technolo...
WT-4151, Efficient Delivery of 3D Web Contents with Khronos and MPEG Technolo...WT-4151, Efficient Delivery of 3D Web Contents with Khronos and MPEG Technolo...
WT-4151, Efficient Delivery of 3D Web Contents with Khronos and MPEG Technolo...
AMD Developer Central
 
vkFX: Effect(ive) approach for Vulkan API
vkFX: Effect(ive) approach for Vulkan APIvkFX: Effect(ive) approach for Vulkan API
vkFX: Effect(ive) approach for Vulkan API
Tristan Lorach
 
CC-4010, Bringing Spatial Love to your Java Application, by Steven Citron-Pousty
CC-4010, Bringing Spatial Love to your Java Application, by Steven Citron-PoustyCC-4010, Bringing Spatial Love to your Java Application, by Steven Citron-Pousty
CC-4010, Bringing Spatial Love to your Java Application, by Steven Citron-Pousty
AMD Developer Central
 
PT-4057, Automated CUDA-to-OpenCL™ Translation with CU2CL: What's Next?, by W...
PT-4057, Automated CUDA-to-OpenCL™ Translation with CU2CL: What's Next?, by W...PT-4057, Automated CUDA-to-OpenCL™ Translation with CU2CL: What's Next?, by W...
PT-4057, Automated CUDA-to-OpenCL™ Translation with CU2CL: What's Next?, by W...
AMD Developer Central
 

What's hot (20)

PG-4036, Computational Fluid Dynamics of the blood flow in the cloud, by Jaku...
PG-4036, Computational Fluid Dynamics of the blood flow in the cloud, by Jaku...PG-4036, Computational Fluid Dynamics of the blood flow in the cloud, by Jaku...
PG-4036, Computational Fluid Dynamics of the blood flow in the cloud, by Jaku...
 
PT-4054, "OpenCL™ Accelerated Compute Libraries" by John Melonakos
PT-4054, "OpenCL™ Accelerated Compute Libraries" by John MelonakosPT-4054, "OpenCL™ Accelerated Compute Libraries" by John Melonakos
PT-4054, "OpenCL™ Accelerated Compute Libraries" by John Melonakos
 
GS-4150, Bullet 3 OpenCL Rigid Body Simulation, by Erwin Coumans
GS-4150, Bullet 3 OpenCL Rigid Body Simulation, by Erwin CoumansGS-4150, Bullet 3 OpenCL Rigid Body Simulation, by Erwin Coumans
GS-4150, Bullet 3 OpenCL Rigid Body Simulation, by Erwin Coumans
 
PT-4102, Simulation, Compilation and Debugging of OpenCL on the AMD Southern ...
PT-4102, Simulation, Compilation and Debugging of OpenCL on the AMD Southern ...PT-4102, Simulation, Compilation and Debugging of OpenCL on the AMD Southern ...
PT-4102, Simulation, Compilation and Debugging of OpenCL on the AMD Southern ...
 
WT-4064, Build Rich Applications with HTML5 and WebGL, by Tony Parisi
WT-4064, Build Rich Applications with HTML5 and WebGL, by Tony ParisiWT-4064, Build Rich Applications with HTML5 and WebGL, by Tony Parisi
WT-4064, Build Rich Applications with HTML5 and WebGL, by Tony Parisi
 
WT-4072, Rendering Web Content at 60fps, by Vangelis Kokkevis, Antoine Labour...
WT-4072, Rendering Web Content at 60fps, by Vangelis Kokkevis, Antoine Labour...WT-4072, Rendering Web Content at 60fps, by Vangelis Kokkevis, Antoine Labour...
WT-4072, Rendering Web Content at 60fps, by Vangelis Kokkevis, Antoine Labour...
 
WT-4073, ANGLE and cross-platform WebGL support, by Shannon Woods
WT-4073, ANGLE and cross-platform WebGL support, by Shannon WoodsWT-4073, ANGLE and cross-platform WebGL support, by Shannon Woods
WT-4073, ANGLE and cross-platform WebGL support, by Shannon Woods
 
PL-4042, Wholly Graal: Accelerating GPU offload for Java/Sumatra using the Op...
PL-4042, Wholly Graal: Accelerating GPU offload for Java/Sumatra using the Op...PL-4042, Wholly Graal: Accelerating GPU offload for Java/Sumatra using the Op...
PL-4042, Wholly Graal: Accelerating GPU offload for Java/Sumatra using the Op...
 
Gcn performance ftw by stephan hodes
Gcn performance ftw by stephan hodesGcn performance ftw by stephan hodes
Gcn performance ftw by stephan hodes
 
WT-4069, WebCL: Enabling OpenCL Acceleration of Web Applications, by Mikael ...
WT-4069, WebCL: Enabling OpenCL Acceleration of Web Applications, by  Mikael ...WT-4069, WebCL: Enabling OpenCL Acceleration of Web Applications, by  Mikael ...
WT-4069, WebCL: Enabling OpenCL Acceleration of Web Applications, by Mikael ...
 
Direct3D12 and the Future of Graphics APIs by Dave Oldcorn
Direct3D12 and the Future of Graphics APIs by Dave OldcornDirect3D12 and the Future of Graphics APIs by Dave Oldcorn
Direct3D12 and the Future of Graphics APIs by Dave Oldcorn
 
Keynote (Mike Muller) - Is There Anything New in Heterogeneous Computing - by...
Keynote (Mike Muller) - Is There Anything New in Heterogeneous Computing - by...Keynote (Mike Muller) - Is There Anything New in Heterogeneous Computing - by...
Keynote (Mike Muller) - Is There Anything New in Heterogeneous Computing - by...
 
GS-4152, AMD’s Radeon R9-290X, One Big dGPU, by Michael Mantor
GS-4152, AMD’s Radeon R9-290X, One Big dGPU, by Michael MantorGS-4152, AMD’s Radeon R9-290X, One Big dGPU, by Michael Mantor
GS-4152, AMD’s Radeon R9-290X, One Big dGPU, by Michael Mantor
 
WT-4065, Superconductor: GPU Web Programming for Big Data Visualization, by ...
WT-4065, Superconductor: GPU Web Programming for Big Data Visualization, by  ...WT-4065, Superconductor: GPU Web Programming for Big Data Visualization, by  ...
WT-4065, Superconductor: GPU Web Programming for Big Data Visualization, by ...
 
PG-4037, Fast modal analysis with NX Nastran and GPUs, by Leonard Hoffnung
PG-4037, Fast modal analysis with NX Nastran and GPUs, by Leonard HoffnungPG-4037, Fast modal analysis with NX Nastran and GPUs, by Leonard Hoffnung
PG-4037, Fast modal analysis with NX Nastran and GPUs, by Leonard Hoffnung
 
PL-4051, An Introduction to SPIR for OpenCL Application Developers and Compil...
PL-4051, An Introduction to SPIR for OpenCL Application Developers and Compil...PL-4051, An Introduction to SPIR for OpenCL Application Developers and Compil...
PL-4051, An Introduction to SPIR for OpenCL Application Developers and Compil...
 
WT-4151, Efficient Delivery of 3D Web Contents with Khronos and MPEG Technolo...
WT-4151, Efficient Delivery of 3D Web Contents with Khronos and MPEG Technolo...WT-4151, Efficient Delivery of 3D Web Contents with Khronos and MPEG Technolo...
WT-4151, Efficient Delivery of 3D Web Contents with Khronos and MPEG Technolo...
 
vkFX: Effect(ive) approach for Vulkan API
vkFX: Effect(ive) approach for Vulkan APIvkFX: Effect(ive) approach for Vulkan API
vkFX: Effect(ive) approach for Vulkan API
 
CC-4010, Bringing Spatial Love to your Java Application, by Steven Citron-Pousty
CC-4010, Bringing Spatial Love to your Java Application, by Steven Citron-PoustyCC-4010, Bringing Spatial Love to your Java Application, by Steven Citron-Pousty
CC-4010, Bringing Spatial Love to your Java Application, by Steven Citron-Pousty
 
PT-4057, Automated CUDA-to-OpenCL™ Translation with CU2CL: What's Next?, by W...
PT-4057, Automated CUDA-to-OpenCL™ Translation with CU2CL: What's Next?, by W...PT-4057, Automated CUDA-to-OpenCL™ Translation with CU2CL: What's Next?, by W...
PT-4057, Automated CUDA-to-OpenCL™ Translation with CU2CL: What's Next?, by W...
 

Similar to PG-4034, Using OpenGL and DirectX for Heterogeneous Compute, by Karl Hillesland

Foveated Ray Tracing for VR on Multiple GPUs
Foveated Ray Tracing for VR on Multiple GPUsFoveated Ray Tracing for VR on Multiple GPUs
Foveated Ray Tracing for VR on Multiple GPUs
Takahiro Harada
 
Introduction to Monte Carlo Ray Tracing, OpenCL Implementation (CEDEC 2014)
Introduction to Monte Carlo Ray Tracing, OpenCL Implementation (CEDEC 2014)Introduction to Monte Carlo Ray Tracing, OpenCL Implementation (CEDEC 2014)
Introduction to Monte Carlo Ray Tracing, OpenCL Implementation (CEDEC 2014)
Takahiro Harada
 
Game Programming 12 - Shaders
Game Programming 12 - ShadersGame Programming 12 - Shaders
Game Programming 12 - Shaders
Nick Pruehs
 
GPGPU Programming @DroidconNL 2012 by Alten
GPGPU Programming @DroidconNL 2012 by AltenGPGPU Programming @DroidconNL 2012 by Alten
GPGPU Programming @DroidconNL 2012 by Alten
Arjan Somers
 
Holy smoke! Faster Particle Rendering using Direct Compute by Gareth Thomas
Holy smoke! Faster Particle Rendering using Direct Compute by Gareth ThomasHoly smoke! Faster Particle Rendering using Direct Compute by Gareth Thomas
Holy smoke! Faster Particle Rendering using Direct Compute by Gareth Thomas
AMD Developer Central
 
Shadow Volumes on Programmable Graphics Hardware
Shadow Volumes on Programmable Graphics HardwareShadow Volumes on Programmable Graphics Hardware
Shadow Volumes on Programmable Graphics Hardware
stefan_b
 
How to Use OpenGL/ES on Native Activity
How to Use OpenGL/ES on Native ActivityHow to Use OpenGL/ES on Native Activity
NIR on the Mesa i965 backend (FOSDEM 2016)
NIR on the Mesa i965 backend (FOSDEM 2016)NIR on the Mesa i965 backend (FOSDEM 2016)
NIR on the Mesa i965 backend (FOSDEM 2016)
Igalia
 
Graph x pregel
Graph x pregelGraph x pregel
Graph x pregel
Sigmoid
 
GraphX and Pregel - Apache Spark
GraphX and Pregel - Apache SparkGraphX and Pregel - Apache Spark
GraphX and Pregel - Apache Spark
Ashutosh Trivedi
 
201707 SER332 Lecture 05
201707 SER332 Lecture 05   201707 SER332 Lecture 05
201707 SER332 Lecture 05
Javier Gonzalez-Sanchez
 
Advanced Cartographic Map Rendering In GeoServer
Advanced Cartographic Map Rendering In GeoServerAdvanced Cartographic Map Rendering In GeoServer
Advanced Cartographic Map Rendering In GeoServer
GeoSolutions
 
201707 SER332 Lecture 06
201707 SER332 Lecture 06 201707 SER332 Lecture 06
201707 SER332 Lecture 06
Javier Gonzalez-Sanchez
 
Vertex Shader Tricks by Bill Bilodeau - AMD at GDC14
Vertex Shader Tricks by Bill Bilodeau - AMD at GDC14Vertex Shader Tricks by Bill Bilodeau - AMD at GDC14
Vertex Shader Tricks by Bill Bilodeau - AMD at GDC14
AMD Developer Central
 
201707 SER332 Lecture 07
201707 SER332 Lecture 07   201707 SER332 Lecture 07
201707 SER332 Lecture 07
Javier Gonzalez-Sanchez
 
SRv6 Mobile User Plane P4 proto-type
SRv6 Mobile User Plane P4 proto-typeSRv6 Mobile User Plane P4 proto-type
SRv6 Mobile User Plane P4 proto-type
Kentaro Ebisawa
 
Custom SRP and graphics workflows - Unite Copenhagen 2019
Custom SRP and graphics workflows - Unite Copenhagen 2019Custom SRP and graphics workflows - Unite Copenhagen 2019
Custom SRP and graphics workflows - Unite Copenhagen 2019
Unity Technologies
 
OpenGL 4.5 Update for NVIDIA GPUs
OpenGL 4.5 Update for NVIDIA GPUsOpenGL 4.5 Update for NVIDIA GPUs
OpenGL 4.5 Update for NVIDIA GPUs
Mark Kilgard
 
Beginning direct3d gameprogramming09_shaderprogramming_20160505_jintaeks
Beginning direct3d gameprogramming09_shaderprogramming_20160505_jintaeksBeginning direct3d gameprogramming09_shaderprogramming_20160505_jintaeks
Beginning direct3d gameprogramming09_shaderprogramming_20160505_jintaeks
JinTaek Seo
 
201707 SER332 Lecture 03
201707 SER332 Lecture 03   201707 SER332 Lecture 03
201707 SER332 Lecture 03
Javier Gonzalez-Sanchez
 

Similar to PG-4034, Using OpenGL and DirectX for Heterogeneous Compute, by Karl Hillesland (20)

Foveated Ray Tracing for VR on Multiple GPUs
Foveated Ray Tracing for VR on Multiple GPUsFoveated Ray Tracing for VR on Multiple GPUs
Foveated Ray Tracing for VR on Multiple GPUs
 
Introduction to Monte Carlo Ray Tracing, OpenCL Implementation (CEDEC 2014)
Introduction to Monte Carlo Ray Tracing, OpenCL Implementation (CEDEC 2014)Introduction to Monte Carlo Ray Tracing, OpenCL Implementation (CEDEC 2014)
Introduction to Monte Carlo Ray Tracing, OpenCL Implementation (CEDEC 2014)
 
Game Programming 12 - Shaders
Game Programming 12 - ShadersGame Programming 12 - Shaders
Game Programming 12 - Shaders
 
GPGPU Programming @DroidconNL 2012 by Alten
GPGPU Programming @DroidconNL 2012 by AltenGPGPU Programming @DroidconNL 2012 by Alten
GPGPU Programming @DroidconNL 2012 by Alten
 
Holy smoke! Faster Particle Rendering using Direct Compute by Gareth Thomas
Holy smoke! Faster Particle Rendering using Direct Compute by Gareth ThomasHoly smoke! Faster Particle Rendering using Direct Compute by Gareth Thomas
Holy smoke! Faster Particle Rendering using Direct Compute by Gareth Thomas
 
Shadow Volumes on Programmable Graphics Hardware
Shadow Volumes on Programmable Graphics HardwareShadow Volumes on Programmable Graphics Hardware
Shadow Volumes on Programmable Graphics Hardware
 
How to Use OpenGL/ES on Native Activity
How to Use OpenGL/ES on Native ActivityHow to Use OpenGL/ES on Native Activity
How to Use OpenGL/ES on Native Activity
 
NIR on the Mesa i965 backend (FOSDEM 2016)
NIR on the Mesa i965 backend (FOSDEM 2016)NIR on the Mesa i965 backend (FOSDEM 2016)
NIR on the Mesa i965 backend (FOSDEM 2016)
 
Graph x pregel
Graph x pregelGraph x pregel
Graph x pregel
 
GraphX and Pregel - Apache Spark
GraphX and Pregel - Apache SparkGraphX and Pregel - Apache Spark
GraphX and Pregel - Apache Spark
 
201707 SER332 Lecture 05
201707 SER332 Lecture 05   201707 SER332 Lecture 05
201707 SER332 Lecture 05
 
Advanced Cartographic Map Rendering In GeoServer
Advanced Cartographic Map Rendering In GeoServerAdvanced Cartographic Map Rendering In GeoServer
Advanced Cartographic Map Rendering In GeoServer
 
201707 SER332 Lecture 06
201707 SER332 Lecture 06 201707 SER332 Lecture 06
201707 SER332 Lecture 06
 
Vertex Shader Tricks by Bill Bilodeau - AMD at GDC14
Vertex Shader Tricks by Bill Bilodeau - AMD at GDC14Vertex Shader Tricks by Bill Bilodeau - AMD at GDC14
Vertex Shader Tricks by Bill Bilodeau - AMD at GDC14
 
201707 SER332 Lecture 07
201707 SER332 Lecture 07   201707 SER332 Lecture 07
201707 SER332 Lecture 07
 
SRv6 Mobile User Plane P4 proto-type
SRv6 Mobile User Plane P4 proto-typeSRv6 Mobile User Plane P4 proto-type
SRv6 Mobile User Plane P4 proto-type
 
Custom SRP and graphics workflows - Unite Copenhagen 2019
Custom SRP and graphics workflows - Unite Copenhagen 2019Custom SRP and graphics workflows - Unite Copenhagen 2019
Custom SRP and graphics workflows - Unite Copenhagen 2019
 
OpenGL 4.5 Update for NVIDIA GPUs
OpenGL 4.5 Update for NVIDIA GPUsOpenGL 4.5 Update for NVIDIA GPUs
OpenGL 4.5 Update for NVIDIA GPUs
 
Beginning direct3d gameprogramming09_shaderprogramming_20160505_jintaeks
Beginning direct3d gameprogramming09_shaderprogramming_20160505_jintaeksBeginning direct3d gameprogramming09_shaderprogramming_20160505_jintaeks
Beginning direct3d gameprogramming09_shaderprogramming_20160505_jintaeks
 
201707 SER332 Lecture 03
201707 SER332 Lecture 03   201707 SER332 Lecture 03
201707 SER332 Lecture 03
 

More from AMD Developer Central

DX12 & Vulkan: Dawn of a New Generation of Graphics APIs
DX12 & Vulkan: Dawn of a New Generation of Graphics APIsDX12 & Vulkan: Dawn of a New Generation of Graphics APIs
DX12 & Vulkan: Dawn of a New Generation of Graphics APIs
AMD Developer Central
 
Leverage the Speed of OpenCL™ with AMD Math Libraries
Leverage the Speed of OpenCL™ with AMD Math LibrariesLeverage the Speed of OpenCL™ with AMD Math Libraries
Leverage the Speed of OpenCL™ with AMD Math Libraries
AMD Developer Central
 
Introduction to Node.js
Introduction to Node.jsIntroduction to Node.js
Introduction to Node.js
AMD Developer Central
 
Media SDK Webinar 2014
Media SDK Webinar 2014Media SDK Webinar 2014
Media SDK Webinar 2014
AMD Developer Central
 
An Introduction to OpenCL™ Programming with AMD GPUs - AMD & Acceleware Webinar
An Introduction to OpenCL™ Programming with AMD GPUs - AMD & Acceleware WebinarAn Introduction to OpenCL™ Programming with AMD GPUs - AMD & Acceleware Webinar
An Introduction to OpenCL™ Programming with AMD GPUs - AMD & Acceleware Webinar
AMD Developer Central
 
DirectGMA on AMD’S FirePro™ GPUS
DirectGMA on AMD’S  FirePro™ GPUSDirectGMA on AMD’S  FirePro™ GPUS
DirectGMA on AMD’S FirePro™ GPUS
AMD Developer Central
 
Webinar: Whats New in Java 8 with Develop Intelligence
Webinar: Whats New in Java 8 with Develop IntelligenceWebinar: Whats New in Java 8 with Develop Intelligence
Webinar: Whats New in Java 8 with Develop Intelligence
AMD Developer Central
 
The Small Batch (and other) solutions in Mantle API, by Guennadi Riguer, Mant...
The Small Batch (and other) solutions in Mantle API, by Guennadi Riguer, Mant...The Small Batch (and other) solutions in Mantle API, by Guennadi Riguer, Mant...
The Small Batch (and other) solutions in Mantle API, by Guennadi Riguer, Mant...
AMD Developer Central
 
Inside XBox- One, by Martin Fuller
Inside XBox- One, by Martin FullerInside XBox- One, by Martin Fuller
Inside XBox- One, by Martin Fuller
AMD Developer Central
 
Rendering Battlefield 4 with Mantle by Yuriy ODonnell
Rendering Battlefield 4 with Mantle by Yuriy ODonnellRendering Battlefield 4 with Mantle by Yuriy ODonnell
Rendering Battlefield 4 with Mantle by Yuriy ODonnellAMD Developer Central
 
Low-level Shader Optimization for Next-Gen and DX11 by Emil Persson
Low-level Shader Optimization for Next-Gen and DX11 by Emil PerssonLow-level Shader Optimization for Next-Gen and DX11 by Emil Persson
Low-level Shader Optimization for Next-Gen and DX11 by Emil Persson
AMD Developer Central
 
Inside XBOX ONE by Martin Fuller
Inside XBOX ONE by Martin FullerInside XBOX ONE by Martin Fuller
Inside XBOX ONE by Martin Fuller
AMD Developer Central
 
Introduction to Direct 3D 12 by Ivan Nevraev
Introduction to Direct 3D 12 by Ivan NevraevIntroduction to Direct 3D 12 by Ivan Nevraev
Introduction to Direct 3D 12 by Ivan Nevraev
AMD Developer Central
 
Computer Vision Powered by Heterogeneous System Architecture (HSA) by Dr. Ha...
Computer Vision Powered by Heterogeneous System Architecture (HSA) by  Dr. Ha...Computer Vision Powered by Heterogeneous System Architecture (HSA) by  Dr. Ha...
Computer Vision Powered by Heterogeneous System Architecture (HSA) by Dr. Ha...
AMD Developer Central
 
Productive OpenCL Programming An Introduction to OpenCL Libraries with Array...
Productive OpenCL Programming An Introduction to OpenCL Libraries  with Array...Productive OpenCL Programming An Introduction to OpenCL Libraries  with Array...
Productive OpenCL Programming An Introduction to OpenCL Libraries with Array...
AMD Developer Central
 
Rendering Battlefield 4 with Mantle by Johan Andersson - AMD at GDC14
Rendering Battlefield 4 with Mantle by Johan Andersson - AMD at GDC14Rendering Battlefield 4 with Mantle by Johan Andersson - AMD at GDC14
Rendering Battlefield 4 with Mantle by Johan Andersson - AMD at GDC14
AMD Developer Central
 
RapidFire - the Easy Route to low Latency Cloud Gaming Solutions - AMD at GDC14
RapidFire - the Easy Route to low Latency Cloud Gaming Solutions - AMD at GDC14RapidFire - the Easy Route to low Latency Cloud Gaming Solutions - AMD at GDC14
RapidFire - the Easy Route to low Latency Cloud Gaming Solutions - AMD at GDC14
AMD Developer Central
 
Mantle and Nitrous - Combining Efficient Engine Design with a modern API - AM...
Mantle and Nitrous - Combining Efficient Engine Design with a modern API - AM...Mantle and Nitrous - Combining Efficient Engine Design with a modern API - AM...
Mantle and Nitrous - Combining Efficient Engine Design with a modern API - AM...
AMD Developer Central
 
Mantle - Introducing a new API for Graphics - AMD at GDC14
Mantle - Introducing a new API for Graphics - AMD at GDC14Mantle - Introducing a new API for Graphics - AMD at GDC14
Mantle - Introducing a new API for Graphics - AMD at GDC14
AMD Developer Central
 
Direct3D and the Future of Graphics APIs - AMD at GDC14
Direct3D and the Future of Graphics APIs - AMD at GDC14Direct3D and the Future of Graphics APIs - AMD at GDC14
Direct3D and the Future of Graphics APIs - AMD at GDC14
AMD Developer Central
 

More from AMD Developer Central (20)

DX12 & Vulkan: Dawn of a New Generation of Graphics APIs
DX12 & Vulkan: Dawn of a New Generation of Graphics APIsDX12 & Vulkan: Dawn of a New Generation of Graphics APIs
DX12 & Vulkan: Dawn of a New Generation of Graphics APIs
 
Leverage the Speed of OpenCL™ with AMD Math Libraries
Leverage the Speed of OpenCL™ with AMD Math LibrariesLeverage the Speed of OpenCL™ with AMD Math Libraries
Leverage the Speed of OpenCL™ with AMD Math Libraries
 
Introduction to Node.js
Introduction to Node.jsIntroduction to Node.js
Introduction to Node.js
 
Media SDK Webinar 2014
Media SDK Webinar 2014Media SDK Webinar 2014
Media SDK Webinar 2014
 
An Introduction to OpenCL™ Programming with AMD GPUs - AMD & Acceleware Webinar
An Introduction to OpenCL™ Programming with AMD GPUs - AMD & Acceleware WebinarAn Introduction to OpenCL™ Programming with AMD GPUs - AMD & Acceleware Webinar
An Introduction to OpenCL™ Programming with AMD GPUs - AMD & Acceleware Webinar
 
DirectGMA on AMD’S FirePro™ GPUS
DirectGMA on AMD’S  FirePro™ GPUSDirectGMA on AMD’S  FirePro™ GPUS
DirectGMA on AMD’S FirePro™ GPUS
 
Webinar: Whats New in Java 8 with Develop Intelligence
Webinar: Whats New in Java 8 with Develop IntelligenceWebinar: Whats New in Java 8 with Develop Intelligence
Webinar: Whats New in Java 8 with Develop Intelligence
 
The Small Batch (and other) solutions in Mantle API, by Guennadi Riguer, Mant...
The Small Batch (and other) solutions in Mantle API, by Guennadi Riguer, Mant...The Small Batch (and other) solutions in Mantle API, by Guennadi Riguer, Mant...
The Small Batch (and other) solutions in Mantle API, by Guennadi Riguer, Mant...
 
Inside XBox- One, by Martin Fuller
Inside XBox- One, by Martin FullerInside XBox- One, by Martin Fuller
Inside XBox- One, by Martin Fuller
 
Rendering Battlefield 4 with Mantle by Yuriy ODonnell
Rendering Battlefield 4 with Mantle by Yuriy ODonnellRendering Battlefield 4 with Mantle by Yuriy ODonnell
Rendering Battlefield 4 with Mantle by Yuriy ODonnell
 
Low-level Shader Optimization for Next-Gen and DX11 by Emil Persson
Low-level Shader Optimization for Next-Gen and DX11 by Emil PerssonLow-level Shader Optimization for Next-Gen and DX11 by Emil Persson
Low-level Shader Optimization for Next-Gen and DX11 by Emil Persson
 
Inside XBOX ONE by Martin Fuller
Inside XBOX ONE by Martin FullerInside XBOX ONE by Martin Fuller
Inside XBOX ONE by Martin Fuller
 
Introduction to Direct 3D 12 by Ivan Nevraev
Introduction to Direct 3D 12 by Ivan NevraevIntroduction to Direct 3D 12 by Ivan Nevraev
Introduction to Direct 3D 12 by Ivan Nevraev
 
Computer Vision Powered by Heterogeneous System Architecture (HSA) by Dr. Ha...
Computer Vision Powered by Heterogeneous System Architecture (HSA) by  Dr. Ha...Computer Vision Powered by Heterogeneous System Architecture (HSA) by  Dr. Ha...
Computer Vision Powered by Heterogeneous System Architecture (HSA) by Dr. Ha...
 
Productive OpenCL Programming An Introduction to OpenCL Libraries with Array...
Productive OpenCL Programming An Introduction to OpenCL Libraries  with Array...Productive OpenCL Programming An Introduction to OpenCL Libraries  with Array...
Productive OpenCL Programming An Introduction to OpenCL Libraries with Array...
 
Rendering Battlefield 4 with Mantle by Johan Andersson - AMD at GDC14
Rendering Battlefield 4 with Mantle by Johan Andersson - AMD at GDC14Rendering Battlefield 4 with Mantle by Johan Andersson - AMD at GDC14
Rendering Battlefield 4 with Mantle by Johan Andersson - AMD at GDC14
 
RapidFire - the Easy Route to low Latency Cloud Gaming Solutions - AMD at GDC14
RapidFire - the Easy Route to low Latency Cloud Gaming Solutions - AMD at GDC14RapidFire - the Easy Route to low Latency Cloud Gaming Solutions - AMD at GDC14
RapidFire - the Easy Route to low Latency Cloud Gaming Solutions - AMD at GDC14
 
Mantle and Nitrous - Combining Efficient Engine Design with a modern API - AM...
Mantle and Nitrous - Combining Efficient Engine Design with a modern API - AM...Mantle and Nitrous - Combining Efficient Engine Design with a modern API - AM...
Mantle and Nitrous - Combining Efficient Engine Design with a modern API - AM...
 
Mantle - Introducing a new API for Graphics - AMD at GDC14
Mantle - Introducing a new API for Graphics - AMD at GDC14Mantle - Introducing a new API for Graphics - AMD at GDC14
Mantle - Introducing a new API for Graphics - AMD at GDC14
 
Direct3D and the Future of Graphics APIs - AMD at GDC14
Direct3D and the Future of Graphics APIs - AMD at GDC14Direct3D and the Future of Graphics APIs - AMD at GDC14
Direct3D and the Future of Graphics APIs - AMD at GDC14
 

Recently uploaded

Transcript: Selling digital books in 2024: Insights from industry leaders - T...
Transcript: Selling digital books in 2024: Insights from industry leaders - T...Transcript: Selling digital books in 2024: Insights from industry leaders - T...
Transcript: Selling digital books in 2024: Insights from industry leaders - T...
BookNet Canada
 
GraphSummit Singapore | The Art of the Possible with Graph - Q2 2024
GraphSummit Singapore | The Art of the  Possible with Graph - Q2 2024GraphSummit Singapore | The Art of the  Possible with Graph - Q2 2024
GraphSummit Singapore | The Art of the Possible with Graph - Q2 2024
Neo4j
 
Elizabeth Buie - Older adults: Are we really designing for our future selves?
Elizabeth Buie - Older adults: Are we really designing for our future selves?Elizabeth Buie - Older adults: Are we really designing for our future selves?
Elizabeth Buie - Older adults: Are we really designing for our future selves?
Nexer Digital
 
State of ICS and IoT Cyber Threat Landscape Report 2024 preview
State of ICS and IoT Cyber Threat Landscape Report 2024 previewState of ICS and IoT Cyber Threat Landscape Report 2024 preview
State of ICS and IoT Cyber Threat Landscape Report 2024 preview
Prayukth K V
 
Microsoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdfMicrosoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdf
Uni Systems S.M.S.A.
 
The Future of Platform Engineering
The Future of Platform EngineeringThe Future of Platform Engineering
The Future of Platform Engineering
Jemma Hussein Allen
 
DevOps and Testing slides at DASA Connect
DevOps and Testing slides at DASA ConnectDevOps and Testing slides at DASA Connect
DevOps and Testing slides at DASA Connect
Kari Kakkonen
 
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
Neo4j
 
Pushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 daysPushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 days
Adtran
 
Generative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to ProductionGenerative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to Production
Aggregage
 
SAP Sapphire 2024 - ASUG301 building better apps with SAP Fiori.pdf
SAP Sapphire 2024 - ASUG301 building better apps with SAP Fiori.pdfSAP Sapphire 2024 - ASUG301 building better apps with SAP Fiori.pdf
SAP Sapphire 2024 - ASUG301 building better apps with SAP Fiori.pdf
Peter Spielvogel
 
Monitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR EventsMonitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR Events
Ana-Maria Mihalceanu
 
UiPath Test Automation using UiPath Test Suite series, part 5
UiPath Test Automation using UiPath Test Suite series, part 5UiPath Test Automation using UiPath Test Suite series, part 5
UiPath Test Automation using UiPath Test Suite series, part 5
DianaGray10
 
Secstrike : Reverse Engineering & Pwnable tools for CTF.pptx
Secstrike : Reverse Engineering & Pwnable tools for CTF.pptxSecstrike : Reverse Engineering & Pwnable tools for CTF.pptx
Secstrike : Reverse Engineering & Pwnable tools for CTF.pptx
nkrafacyberclub
 
Encryption in Microsoft 365 - ExpertsLive Netherlands 2024
Encryption in Microsoft 365 - ExpertsLive Netherlands 2024Encryption in Microsoft 365 - ExpertsLive Netherlands 2024
Encryption in Microsoft 365 - ExpertsLive Netherlands 2024
Albert Hoitingh
 
20240607 QFM018 Elixir Reading List May 2024
20240607 QFM018 Elixir Reading List May 202420240607 QFM018 Elixir Reading List May 2024
20240607 QFM018 Elixir Reading List May 2024
Matthew Sinclair
 
National Security Agency - NSA mobile device best practices
National Security Agency - NSA mobile device best practicesNational Security Agency - NSA mobile device best practices
National Security Agency - NSA mobile device best practices
Quotidiano Piemontese
 
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
James Anderson
 
Communications Mining Series - Zero to Hero - Session 1
Communications Mining Series - Zero to Hero - Session 1Communications Mining Series - Zero to Hero - Session 1
Communications Mining Series - Zero to Hero - Session 1
DianaGray10
 
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdfObservability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Paige Cruz
 

Recently uploaded (20)

Transcript: Selling digital books in 2024: Insights from industry leaders - T...
Transcript: Selling digital books in 2024: Insights from industry leaders - T...Transcript: Selling digital books in 2024: Insights from industry leaders - T...
Transcript: Selling digital books in 2024: Insights from industry leaders - T...
 
GraphSummit Singapore | The Art of the Possible with Graph - Q2 2024
GraphSummit Singapore | The Art of the  Possible with Graph - Q2 2024GraphSummit Singapore | The Art of the  Possible with Graph - Q2 2024
GraphSummit Singapore | The Art of the Possible with Graph - Q2 2024
 
Elizabeth Buie - Older adults: Are we really designing for our future selves?
Elizabeth Buie - Older adults: Are we really designing for our future selves?Elizabeth Buie - Older adults: Are we really designing for our future selves?
Elizabeth Buie - Older adults: Are we really designing for our future selves?
 
State of ICS and IoT Cyber Threat Landscape Report 2024 preview
State of ICS and IoT Cyber Threat Landscape Report 2024 previewState of ICS and IoT Cyber Threat Landscape Report 2024 preview
State of ICS and IoT Cyber Threat Landscape Report 2024 preview
 
Microsoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdfMicrosoft - Power Platform_G.Aspiotis.pdf
Microsoft - Power Platform_G.Aspiotis.pdf
 
The Future of Platform Engineering
The Future of Platform EngineeringThe Future of Platform Engineering
The Future of Platform Engineering
 
DevOps and Testing slides at DASA Connect
DevOps and Testing slides at DASA ConnectDevOps and Testing slides at DASA Connect
DevOps and Testing slides at DASA Connect
 
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
GraphSummit Singapore | Graphing Success: Revolutionising Organisational Stru...
 
Pushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 daysPushing the limits of ePRTC: 100ns holdover for 100 days
Pushing the limits of ePRTC: 100ns holdover for 100 days
 
Generative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to ProductionGenerative AI Deep Dive: Advancing from Proof of Concept to Production
Generative AI Deep Dive: Advancing from Proof of Concept to Production
 
SAP Sapphire 2024 - ASUG301 building better apps with SAP Fiori.pdf
SAP Sapphire 2024 - ASUG301 building better apps with SAP Fiori.pdfSAP Sapphire 2024 - ASUG301 building better apps with SAP Fiori.pdf
SAP Sapphire 2024 - ASUG301 building better apps with SAP Fiori.pdf
 
Monitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR EventsMonitoring Java Application Security with JDK Tools and JFR Events
Monitoring Java Application Security with JDK Tools and JFR Events
 
UiPath Test Automation using UiPath Test Suite series, part 5
UiPath Test Automation using UiPath Test Suite series, part 5UiPath Test Automation using UiPath Test Suite series, part 5
UiPath Test Automation using UiPath Test Suite series, part 5
 
Secstrike : Reverse Engineering & Pwnable tools for CTF.pptx
Secstrike : Reverse Engineering & Pwnable tools for CTF.pptxSecstrike : Reverse Engineering & Pwnable tools for CTF.pptx
Secstrike : Reverse Engineering & Pwnable tools for CTF.pptx
 
Encryption in Microsoft 365 - ExpertsLive Netherlands 2024
Encryption in Microsoft 365 - ExpertsLive Netherlands 2024Encryption in Microsoft 365 - ExpertsLive Netherlands 2024
Encryption in Microsoft 365 - ExpertsLive Netherlands 2024
 
20240607 QFM018 Elixir Reading List May 2024
20240607 QFM018 Elixir Reading List May 202420240607 QFM018 Elixir Reading List May 2024
20240607 QFM018 Elixir Reading List May 2024
 
National Security Agency - NSA mobile device best practices
National Security Agency - NSA mobile device best practicesNational Security Agency - NSA mobile device best practices
National Security Agency - NSA mobile device best practices
 
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
Alt. GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using ...
 
Communications Mining Series - Zero to Hero - Session 1
Communications Mining Series - Zero to Hero - Session 1Communications Mining Series - Zero to Hero - Session 1
Communications Mining Series - Zero to Hero - Session 1
 
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdfObservability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
 

PG-4034, Using OpenGL and DirectX for Heterogeneous Compute, by Karl Hillesland

  • 1. USING  OPENGL  AND  DIRECTX  FOR   HETEROGENEOUS  COMPUTE   KARL  HILLESLAND  
  • 2. AGENDA   THE  GRAPHICS  PIPELINE   PROGRAMMING  THE  GPU   FEEDING  THE  GPU   2   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 4. GRAPHICS  PIPELINE   SHADER  CENTRIC   OpenGL   DirectX     Vertex  Shader     Vertex  Shader   TessellaQon  Control  Shader   TessellaQon  EvaluaQon  Shader   Geometry  Shader   Rasterizer   Fragment  Shader   Per-­‐Fragment  OperaQons   TessellaQon  PrimiQve  Generator 4   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL     Input  Assembler Vertex  Puller   Tessellator   Hull  Shader   Domain  Shader   Geometry  Shader   Rasterizer   Pixel  Shader   Output  Merger
  • 5. GRAPHICS  PIPELINE   MORE  DETAILS   indices,   verQces     Input  Assembler Thread  per  DS  vertex  (n3)   Barycentric   Domain  Shader   DS  vertex   Collects  prims   vertex       Tessellator Patch  verts  n2       Prim  verts   Geometry  Shader 5   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Prims   Collects  Patches Patch  verts  n1   Tess   factors   Collects  patches   vertex   Vertex  Shader Thread   per  vertex     Patch  Constant   Hull  Shader Control  point     PrimiQve  Assembler Thread  per  output   control  point  n2     Next  Slide
  • 6. prim   Hi-­‐Z/Stencil  info       Rasterizer  2   Unroller Rasterizer  1 Hi-­‐Z/Stencil Unrolling,   Masking   Pixel  Shader     Reordering Depth/Stencil         Blending Not  shown:  Any  shader  stage  can  read/write  to  memory,   including  atomics,  filtering*,  decompression,  and  sRGB   conversion     Collects  Quads Conversion 6   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Early-­‐Z/Stencil  
  • 7. WHAT’S  THE  POINT?   !  The  Graphics  pipeline  has  a  lot  more  parts   ‒  Reorganizes  threads   ‒  Tracks  dependencies   ‒  Reorders   ‒  Extra  fixed-­‐funcQon  units   !  Are  they  usable?   7   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 8. GRAPHICS  IN  THE  NINETIES       Input  Assembler   Transform  and  LighQng   Rasterizer   Texturing  and  Fog   Output  Merger 8   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 9. VORONOI  DIAGRAMS   GPGPU  WITHOUT  SHADERS   !  Color  according  to  closest   ‒  Point   ‒  Line   !  Could  be  weighted   !  Useful  for     ‒  Collision  DetecQon   ‒  Surface  ReconstrucQon   ‒  Robot  MoQon  Planning   ‒  Non-­‐PhotorealisQc  Rendering   ‒  Surface  SimplificaQon   ‒  Mesh  GeneraQon   9   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 10. VORONOI  DIAGRAMS  IN  THE  NINETIES   Simply  rasterize  the   cones  using  graphics   hardware   Haeberli90,  Woo97   10   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   2-­‐part  discrete  Voronoi   diagram  representaQon   Color  Buffer   Site  IDs   Depth  Buffer   Distance  
  • 11. OPENGL  1  SIMD  MACHINE   PEERCY,  ET.  AL.  SIGGRAPH  2000   SIMD  Concept   OpenGL  1  SIMD   InstrucQon   OpenGL  call  (CPU)   SIMD  Lane   Pixel   SIMD  Lane  Input  Data   Texel   SIMD  Lane  Output  Data   Fragment   ALU   Blend  OperaQon   CondiQonals   Alpha  and  Stencil  Tests   11   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   float y; float4 contrived_example() { float x = f(u,v) if( x*y > 0) { x = x + g(u,v) } return x*h(u,v); }
  • 12. USING  EARLY-­‐Z  OR  STENCIL   Texture-­‐space  blur   With  back-­‐face  culling   ApplicaQons  of  Explicit  Early-­‐Z  Culling,  Real-­‐Time  Shading  Course,  Siggraph  2004.   12   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Pressure  buffer   used  for  sim  culling  
  • 13. What’s  the  Point?   The  graphics  pipeline     gives  you  access  to  more   13   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 15. SHADER  TYPES   !  Compute  (4.3)   !  Vertex  (2,  ES  2)   !  TessellaQon  Control  (4)   !  TessellaQon  EvaluaQon  (4)     !  Geometry  (3)   !  Fragment  (2,  ES  2)   OpenGL   15   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   !  !  !  !  !  !  Compute  (11)   Vertex  (8)   Hull  (11)   Domain  (11)   Geometry  (10)   Pixel  (9)   D3D   15  
  • 16. BASIC  GLSL  VERTEX  SHADER   #version 430 in vec3 Position; in vec2 UV; out PosUV //Not available in GLES { vec3 vPositionWS; vec2 vUV; } vs_output; uniform mat4x4 mMVP; uniform mat4x4 mM; void main(void) { gl_Position = mMVP * vec4(Position, 1.0); vs_output.vPositionWS = mM * vec4(Position, 1.0); vs_output.vUV = UV; } 16   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   16  
  • 17. BASIC  GLSL  PIXEL  SHADER   in fsInput //Not available in GLES { vec3 vPositionWS; vec2 vUV; } fs_input; uniform sampler2D sDiffuse; out vec4 color_out; void main(void) { color_out = texture( sDiffuse, fs_input.vUV ); } 17   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   17  
  • 18. BASIC  HLSL  VERTEX  SHADER   struct PosUV //Not available in GLES { float4 vPositionSS : SV_POSITION; float3 vPositionWS : POSITION; float2 vUV : TEXCOORD0; }; float4x4 mMVP; float4x4 mM; PosUV main( float3 Position : POSITION, float2 UV: TEXCOORD0) { PosUV vs_output; output.vPositionSS = mMVP * float4(Position, 1.0); vs_output.vPositionWS = mMP * float4(Position, 1.0); vs_output.vUV = UV; return vs_output; } 18   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   18  
  • 19. BASIC  HLSL  PIXEL  SHADER   struct fsInput { float3 vPositionWS : POSITION; float2 vUV : TEXCOORD0; }; sampler sWrapTriLin; texture2D <float4> tDiffuse; float4 main(fsInput i) : SV_TARGET { return tDiffuse.Sample(sWrapTriLin, i.vUV); } 19   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   19  
  • 20. BASIC  GEOMETRY  SHADER   layout (triangles) in; layout (triangle_strip, max_vertices = 3) out; void main(void) { for(int i=0; i < gl_in.length(); i++) { gl_Position = gl_in[i].gl_Position; EmitVertex(); } EndPrimitive(); } 20   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   20  
  • 21. TESSELLATION   TessellaQon  Control   Hull  Shader   Patch  Constant  Func   Tess  factors   Tess  factors   Tessellator   Tessellator   Topology   Topology   TessellaQon   EvaluaQon   OpenGL  4.0   21   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Domain   Shader   D3D11   21  
  • 22. TESSELLATION   // Tessellation Control layout (vertices = 4) out; void TCS(void) { if (gl_InvocationID == 0) { gl_TessLevelInner[0] = 2.0; … // Hull Shader [outputcontrolpoints(4)] [patchconstantfunc("ConstantsHS")] [domain("quad")] [partitioning(“integer")] [outputtopology("triangle_cw")] // Tessellation Evaluation layout (quads, cw, equal_spacing) in void TES(void) { … HS_OUTPUT HullShader(…) // Domain Shader DS_OUTPUT DomainShader(…) OpenGL  4.0   22   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   D3D11   22  
  • 23. TESSELLATION  CONTROL   out patch float tessFactor; void main(void) { if (gl_InvocationID == 0) TessellaQon  rate  can  be  set  by  any   instance   { gl_TessLevelInner[0] = 2.0; … tessFactor = 2.0; } Values  can  be   communicated  across   threads   barrier(); DoSomeWork(tessFactor, gl_InvocationID); 23   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   23  
  • 24. COMPUTE  SHADERS   Thread Group Thread Thread group size y Thread global size y global size x Thread group size x !  Groups  can  share  local  memory   !  Threads  can  be  synced  at  a  group  level   24   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   24  
  • 25. OPENGL  COMPUTE   buffer BlockName { int linearOutput[] }; shared int var; layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) void ContrivedSample() { const uvec3 localIdx = gl_LocalInvocationID; const uvec3 globalIdx = gl_GlobalInvocationID; const uvec3 groupIdx = gl_WorkGroupID; if(localId.x == 0) var = groupIdx.x; barrier(); linearOutput[globalIdx.x] = var; } 25   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   25  
  • 26. DIRECT  COMPUTE   RWStructuredBuffer<int> linearOutput; groupshared int var; [numthreads(64, 1, 1)] void ContrivedSample( uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID ) { if(localIdx.x == 0) var = groupIdx.x; GroupMemoryBarrierWithGroupSync(); linearOutput[globalIdx.x] = var; } 26   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   26  
  • 27. PROGRAMMING  THE  GPU   SYNCHRONIZATION  
  • 28. MEMORY  COHERENCE-­‐  GL  /  DX   Dispatch   CS   28   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Mem   CS   28  
  • 29. MEMORY  COHERENCE-­‐  GL/DX  11.1   Draw   VS   Mem   GS   VS   GS   FS   FS   RT   29   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   29  
  • 30. MEMORY  COHERENCE-­‐  GL  /  DX  11.1   Draw   VS   Mem   GS   FS   RT   30   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   30  
  • 32. DRIVER  STACKS  (WINDOWS)    OpenGL  App   DirectX  App   OpenGL32.dll   D3D11.dll   D3D  UMD   OpenGL  ICD   DXGI   KMD   32   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   32  
  • 33. DRIVER  STACKS  (LINUX)   App   libGL   Gallium3D   State  tracker   DRI   Or   Hardware  layer   Gallium3D   WinSys   libDRM-­‐radeon   drm   33   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   33  
  • 34. FEEDING  THE  GPU   GPU-­‐CPU  SYNCHRONIZATION  
  • 35. DRIVER  COMMAND  QUEUE   ApplicaQon   Dr   5   Da   5   Da  1   Dr  1   Da     6   Da  2   Dr   6   Dr  2   Da  3   Dr  3   Da  4   Dr  4   Da  5   Dr  5   Da  6   Dr  6   Driver/GPU   Time   Reorder  possible?   35   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   35  
  • 36. CPU/GPU  MEMORY  SYNCHRONIZATION   BY  DRIVER   App   Memory   Driver   Copy   App   Memory   Driver   Copy   Hints   36   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   GPU   Read   Driver   Copy   GPU   Read   Stream,  StaQc,  Dynamic   Draw,  Read,  Copy  
  • 37. CPU/GPU  MEMORY  SYNCHRONIZATION   MANUAL   App   Memory   Da  1   Dr  1   Driver   Copy   App  Copy   Da  2   Dr  2   Fence   37   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Da  3   Dr  3   Da  4   Dr  4   Da  5   GPU   Read   Dr  5   Da  6   Dr  6  
  • 38. FEEDING  THE  GPU   DATA  
  • 39. LEGACY  OPENGL  OBJECT  MODEL   !  glGenBuffers,  glGenTextures,  glGenSamplers,  …   ‒  Creates  name  /  handle   !  glBindBuffer,  glBindTexture,     ‒  Sets  as  current   !  glBufferData,  glTexSubImage,  glMapBuffer   ‒  Supplies  data   39   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   39  
  • 40. BUFFER  BINDING  AND  CREATION   glBindBuffer(target,name)   Target   binding   BufferObject   State,  Usage   BufferData   desc.BindFlags  =  <Target>   pDevice-­‐>CreateBuffer(desc,…)   40   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   40  
  • 41. SETTING  DATA  (SIMPLEST  OPTION)   glBufferData   (target,  size,  pData,  usage)   data   Target   binding   BufferObject   desc.Usage  =  <Usage>   desc.CPUAccessFlags  =  <RWUsage>   pDevice-­‐>CreateBuffer(desc,pData,)   41   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   41  
  • 42. BUFFER  TARGETS   GL  Name   Typical  Purpose     DX  Equivalent   ARRAY   VerQces   VERTEX   ELEMENT_ARRAY   Indices   INDEX   UNIFORM   Read-­‐only  vars   CONSTANT   TEXTURE_BUFFER   Buffer-­‐as-­‐texture   CONSTANT  (tbuffer)   SHADER_STORAGE   Read/write   SHADER_RESOURCE   TRANSFORM_FEEDBACK   Stream  out   Stream  out   DRAW_INDIRECT   indirect  draw   DRAWINDIRECT   ATOMIC_COUNTER   Global  counter  var   UAV_FLAG_COUNTER   COPY_READ,  _WRITE   Copying  (opQonal)   Staging?   PIXEL_PACK,  _UNPACK   GPU  <-­‐>  CPU   Staging?   42   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   42  
  • 43. DIRECTX  OBJECTS  AND  VIEWS   !  Resource  (base  class)   ‒  Usage:  default,  immutable,  dynamic,  staging   ‒  Bind  flags:  vertex,  index,  shader  resource,  …   !  Buffer   !  Texture2D,  …   !  DepthStencilView   !  RenderTargetView   !  ShaderResourceView   !  UnorderedAccessView   43   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   43  
  • 44. OBJECT  AND  VIEW  EXAMPLE   D3D11_BUFFER_DESC desc; desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; … pDevice->CreateBuffer(&desc, data, &pBuffer); D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; srcDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; … pDevice->CreateShaderResourceView(pBuffer, &srvDesc, &pView); //at draw time pContext->VSSetShaderResources(0, 1, pView); 44   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   44  
  • 45. DATA  TYPES   Image   45   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   Linear  
  • 46. IMMUTABLE  TEXTURES  (4.2,  GLES  3)   glGenTextures(1, &texObjName); glBindTexture(GL_TEXTURE_2D_ARRAY, texObjName); glTexStorage3D(GL_TEXTURE_2D_ARRAY, level, internalformat, width, height, depth); glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0,0,0, width, height, depth, format, type, pData);   CreateTexture2D( desc, srcDataLayout, pData); 46   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   46  
  • 47. FEEDING  THE  GPU   PROGRAMS  
  • 48. SHADER  MANAGEMENT  -­‐  OPENGL   Program  Object   GLuint shader = glCreateShader(GL_VERTEX_SHADER); Vertex  Shader   glShaderSource(…); glCompileShader(); Pixel  Shader   GLuint program = glCreateProgram(); glAttachShader(program, shader); glLinkProgram(program); glUseProgram(program);       48   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   48  
  • 49. BASIC  GLSL  PIXEL  SHADER   in fsInput //Not available in GLES { vec3 vPositionWS; vec2 vUV; } fs_input; uniform sampler2D sDiffuse; out vec4 color_out; void main(void) { color_out = texture( sDiffuse, fs_input.vUV ); } 49   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   49  
  • 50. BASIC  GLSL  VERTEX  SHADER   #version 430 in vec3 Position; in vec2 UV; out PosUV //Not available in GLES { vec3 vPositionWS; vec2 vUV; } vs_output; uniform mat4x4 mMVP; uniform mat4x4 mM; void main(void) { gl_Position = mMVP * vec4(Position, 1.0); vs_output.vPositionWS = mM * vec4(Position, 1.0); vs_output.vUV = UV; } 50   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   50  
  • 51. SHADER  MANAGEMENT  -­‐  DX   D3DCompile(source,..,vs_5_0,..,&pByteCode) pShader = CreateVertexShader(pByteCode); VSSetShader(pShader,0,0); !  No  program  /  link  concept  in  API   51   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   51  
  • 52. PROGRAM  BINARIES   OpenGL   glGetProgramBinary(program,…,format,pBinaryOut); DirectX   D3DCompile(source,..,vs_5_0,..,&pByteCode)   !  Program  level   !  Shader  level   !  In  theory:  format  choices   !  Portable  byte  code   !  In  pracQce:  somewhat  final,  non-­‐portable   52   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   52  
  • 53. DRAW  CALLS   OpenGL   D3D   glDrawArrays   Draw   glDrawArraysInstanced   DrawInstanced(…,0)   glDrawArraysInstancedBaseInstance   DrawInstanced   glDrawArraysIndirect   DrawInstancedIndirect   glMulQDrawArrays   for(int  i=0;  i<n;  ++i)        Draw(count[i],  start[i]);   glMulQDrawArraysIndirect   for(int  i=0;  i<n;  ++i)        DrawInstancedIndirect(…)   glDrawElements   DrawIndexed   …And  so  forth   53   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   53  
  • 54. COMPUTE  SHADERS   glDispatchCompute(nGroupsX,nGroupsY,nGroupsZ)   Dispatch(nGroupsX,nGroupsY,nGroupsZ )   glDispatchComputeIndirect(offset)   DispatchIndirect(pResource,offset)   OpenGL  4.3   54   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL   D3D11   54  
  • 56. IMAGE-­‐BASED  MODELING   56   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 57. GENERATING  THE  MODEL   Render:  projecQon,   rasterizaQon,   texturing,  depth   buffering,  …   57   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 58. TressFX   HAIR   !  AMD  technology  for  high-­‐quality  hair  rendering   !  Thousands  of  hair  strands  individually  simulated  and   rendered  on  the  GPU   !  DirectCompute  physics  simulaQon   !  Shader  Model  5.0  pixel  shader  using  compute  capabiliQes  for   rendering   58   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 59. NOT  EXPOSED  IN  GRAPHICS  APIS  (YET)   !  Local  shared  memory  restricted  to   ‒  Compute     ‒  TessellaQon  Control,  in  a  limited  sense   !  Some  OpenCL  extensions  (e.g.,  64  bit  atomics)   !  Numerical  compliance   !  Some  OpenCL  1.2  addiQons   !  OpenCL  2.0  addiQons   59   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 60. SUMMARY   The  graphics  pipeline     gives  you  access  to  different  hardware   There  are  addiQonal  synchroniza6on   issues  and  opportunites   Mix  and  match  for  the  best  of  both   compute  and  graphics   60   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL  
  • 61. DISCLAIMER  &  ATTRIBUTION   The  informaQon  presented  in  this  document  is  for  informaQonal  purposes  only  and  may  contain  technical  inaccuracies,  omissions  and  typographical  errors.     The  informaQon  contained  herein  is  subject  to  change  and  may  be  rendered  inaccurate  for  many  reasons,  including  but  not  limited  to  product  and  roadmap   changes,  component  and  motherboard  version  changes,  new  model  and/or  product  releases,  product  differences  between  differing  manufacturers,  sozware   changes,  BIOS  flashes,  firmware  upgrades,  or  the  like.  AMD  assumes  no  obligaQon  to  update  or  otherwise  correct  or  revise  this  informaQon.  However,  AMD   reserves  the  right  to  revise  this  informaQon  and  to  make  changes  from  Qme  to  Qme  to  the  content  hereof  without  obligaQon  of  AMD  to  noQfy  any  person  of   such  revisions  or  changes.     AMD  MAKES  NO  REPRESENTATIONS  OR  WARRANTIES  WITH  RESPECT  TO  THE  CONTENTS  HEREOF  AND  ASSUMES  NO  RESPONSIBILITY  FOR  ANY   INACCURACIES,  ERRORS  OR  OMISSIONS  THAT  MAY  APPEAR  IN  THIS  INFORMATION.     AMD  SPECIFICALLY  DISCLAIMS  ANY  IMPLIED  WARRANTIES  OF  MERCHANTABILITY  OR  FITNESS  FOR  ANY  PARTICULAR  PURPOSE.  IN  NO  EVENT  WILL  AMD  BE   LIABLE  TO  ANY  PERSON  FOR  ANY  DIRECT,  INDIRECT,  SPECIAL  OR  OTHER  CONSEQUENTIAL  DAMAGES  ARISING  FROM  THE  USE  OF  ANY  INFORMATION   CONTAINED  HEREIN,  EVEN  IF  AMD  IS  EXPRESSLY  ADVISED  OF  THE  POSSIBILITY  OF  SUCH  DAMAGES.     ATTRIBUTION   ©  2013  Advanced  Micro  Devices,  Inc.  All  rights  reserved.  AMD,  the  AMD  Arrow  logo  and  combinaQons  thereof  are  trademarks  of  Advanced  Micro  Devices,   Inc.  in  the  United  States  and/or  other  jurisdicQons.    SPEC    is  a  registered  trademark  of  the  Standard  Performance  EvaluaQon  CorporaQon  (SPEC).  Other   names  are  for  informaQonal  purposes  only  and  may  be  trademarks  of  their  respecQve  owners.   61   |      PRESENTATION  TITLE      |      DECEMBER  4,  2013      |      CONFIDENTIAL