Embrace the dark side. As a developer you'll often be advised that writing concurrent code should be the purview of the genius coders alone. In this talk Michael Barker will discard that notion into the cesspits of logic and reason and attempt to present on the less understood area of non-blocking concurrency, i.e. concurrency without locks. We'll look the modern Intel CPU architecture, why we need a memory model, the performance costs of various non-blocking constructs and delve into the implementation details of the latest version of the Disruptor to see how non-blocking concurrency can be applied to build high performance data structures.
5. Causality
Causality
Fear will keep the
local systems inline.
instructions
- Grand Moff Wilhuff Tarkin
6. • Loads are not reordered with other loads.
• Stores are not reordered with other stores.
• Stores are not reordered with older loads.
• In a multiprocessor system, memory ordering obeys causality (memory
ordering respects transitive visibility).
• In a multiprocessor system, stores to the same location have a total order.
• In a multiprocessor system, locked instructions to the same
location have a total order.
• Loads and Stores are not reordered with locked instructions.
9. public class AtomicLong extends Number
implements Serializable {
// ...
private volatile long value;
// ...
/**
* Sets to the given value.
*
* @param newValue the new value
*/
public final void set(long newValue) {
value = newValue;
}
// ...
}
10. # {method} 'set' '(J)V' in 'java/util/concurrent/atomic/AtomicLong'
# this: rsi:rsi = 'java/util/concurrent/atomic/AtomicLong'
# parm0: rdx:rdx = long
# [sp+0x20] (sp of caller)
mov 0x8(%rsi),%r10d
shl $0x3,%r10
cmp %r10,%rax
jne 0x00007f1f410378a0 ; {runtime_call}
xchg %ax,%ax
nopl 0x0(%rax,%rax,1)
xchg %ax,%ax
push %rbp
sub $0x10,%rsp
nop
mov %rdx,0x10(%rsi)
lock addl $0x0,(%rsp) ;*putfield value
; - j.u.c.a.AtomicLong::set@2 (line 112)
add $0x10,%rsp
pop %rbp
test %eax,0xa40fd06(%rip) # 0x00007f1f4b471000
; {poll_return}
11. public class AtomicLong extends Number
implements Serializable {
// setup to use Unsafe.compareAndSwapLong for updates
private static final Unsafe unsafe = Unsafe.getUnsafe();
private static final long valueOffset;
// ...
/**
* Eventually sets to the given value.
*
* @param newValue the new value
* @since 1.6
*/
public final void lazySet(long newValue) {
unsafe.putOrderedLong(this, valueOffset, newValue);
}
// ...
}
12. # {method} 'lazySet' '(J)V' in 'java/util/concurrent/atomic/
AtomicLong'
# this: rsi:rsi = 'java/util/concurrent/atomic/AtomicLong'
# parm0: rdx:rdx = long
# [sp+0x20] (sp of caller)
mov 0x8(%rsi),%r10d
shl $0x3,%r10
cmp %r10,%rax
jne 0x00007f1f410378a0 ; {runtime_call}
xchg %ax,%ax
nopl 0x0(%rax,%rax,1)
xchg %ax,%ax
push %rbp
sub $0x10,%rsp
nop
mov %rdx,0x10(%rsi) ;*invokevirtual putOrderedLong
; - AtomicLong::lazySet@8 (line 122)
add $0x10,%rsp
pop %rbp
test %eax,0xa41204b(%rip) # 0x00007f1f4b471000
; {poll_return}
13. public class AtomicInteger extends Number
implements Serializable {
// setup to use Unsafe.compareAndSwapInt for updates
private static final Unsafe unsafe = Unsafe.getUnsafe();
private static final long valueOffset;
private volatile int value;
//...
public final boolean compareAndSet(int expect,
int update) {
return unsafe.compareAndSwapInt(this, valueOffset,
expect, update);
}
}
14. # {method} 'compareAndSet' '(JJ)Z' in 'java/util/concurrent/
atomic/AtomicLong'
# this: rsi:rsi = 'java/util/concurrent/atomic/AtomicLong'
# parm0: rdx:rdx = long
# parm1: rcx:rcx = long
# [sp+0x20] (sp of caller)
mov 0x8(%rsi),%r10d
shl $0x3,%r10
cmp %r10,%rax
jne 0x00007f6699037a60 ; {runtime_call}
xchg %ax,%ax
nopl 0x0(%rax,%rax,1)
xchg %ax,%ax
sub $0x18,%rsp
mov %rbp,0x10(%rsp)
mov %rdx,%rax
lock cmpxchg %rcx,0x10(%rsi)
sete %r11b
movzbl %r11b,%r11d ;*invokevirtual compareAndSwapLong
; - j.u.c.a.AtomicLong::compareAndSet@9 (line
149)
mov %r11d,%eax
add $0x10,%rsp
pop %rbp
test %eax,0x91df935(%rip) # 0x00007f66a223e000
; {poll_return}
16. Example - Disruptor Multi-producer
private void publish(Disruptor disruptor, long value) {
long next = disruptor.next();
disruptor.setValue(next, value);
disruptor.publish(next);
}
17. Example - Disruptor Multi-producer
public long next() {
long next;
long current;
do {
current = nextSequence.get();
next = current + 1;
while (next > (readSequence.get() + size)) {
LockSupport.parkNanos(1L);
continue;
}
} while (!nextSequence.compareAndSet(current, next));
return next;
}
18. Algorithm: Spin - 1
public void publish(long sequence) {
long sequenceMinusOne = sequence - 1;
while (cursor.get() != sequenceMinusOne) {
// Spin
}
cursor.lazySet(sequence);
}
22. Algorithm: Buffer
public long next() {
long next;
long current;
do {
current = cursor.get();
next = current + 1;
while (next > (readSequence.get() + size)) {
LockSupport.parkNanos(1L);
continue;
}
} while (!cursor.compareAndSet(current, next));
return next;
}
23. Algorithm: Buffer
public void publish(long sequence) {
int publishedValue = (int) (sequence >>> indexShift);
published.set(indexOf(sequence), publishedValue);
}
// Get Value
int availableValue = (int) (current >>> indexShift);
int index = indexOf(current);
while (published.get(index) != availableValue) {
// Spin
}
- Concurrency is taught all wrong.\n- What is non-blocking concurrency.\n- Mechanical Sympathy, locks/mutexs are a completely artificial construct\n- MTs concurrency course blocking v. non-blocking.\n- Tools for non-blocking concurrency functions of the CPU, need to look at CPU architecture first.\n
- Causality\n- Why CPUs/Compilers reorder\n
- Java Memory Model provides serial consistency for race-free programs\n- As-if-serial\n- Disallows out of thin air values\n- First main-stream programming language to include a memory model (C/C++ combination of the CPU and whatever the compiler happens to do.\n
\n
\n
\n
- volatile\n- java.util.concurrent.atomic.*\n - Atomic<Long|Integer|Reference>\n - Atomic<Long|Integer|Reference>Array (why use over an array of atomics)\n - Atomic<Long|Integer|Reference>FieldUpdater (can be a bit slow)\n
- Fight club\n- If you&#x2019;re smart enough\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n
- Thread wake ups\n- Hard spin\n- Spin with yield\n- PAUSE instruction - please add to Java\n- MONITOR and MWAIT\n