SlideShare a Scribd company logo
1 of 44
Download to read offline
Bit Twiddling Hacks
Integers
David Barina
March 28, 2014
David Barina Bit Hacks March 28, 2014 1 / 41
Counting bits set: naive
unsigned x;
unsigned c;
for(c = 0; x; x >>= 1)
{
c += x & 1;
}
a.k.a. population count, popcount
David Barina Bit Hacks March 28, 2014 2 / 41
Counting bits set: Kernighan
for(c = 0; x; c++)
{
x &= x - 1;
}
David Barina Bit Hacks March 28, 2014 3 / 41
Counting bits set: Kernighan
x
x-1
x&(x-1)
x
x-1
x&(x-1)
David Barina Bit Hacks March 28, 2014 4 / 41
Counting bits set: in parallel
x -= x >> 1 & ˜0U/3;
x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3);
x = (x + (x >> 4)) & ˜0U/255*15;
c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT;
David Barina Bit Hacks March 28, 2014 5 / 41
Counting bits set
x -= x >> 1 & ˜0U/3;
x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3);
x = (x + (x >> 4)) & ˜0U/255*15;
c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT;
David Barina Bit Hacks March 28, 2014 6 / 41
Counting bits set
&~0U/3
x
x>>1
11 - 01 = 10 = 2
10 - 01 = 01 = 1
01 - 00 = 01 = 1
00 - 00 = 00 = 0
-
0..2
David Barina Bit Hacks March 28, 2014 7 / 41
Counting bits set
x -= x >> 1 & ˜0U/3;
x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3);
x = (x + (x >> 4)) & ˜0U/255*15;
c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT;
David Barina Bit Hacks March 28, 2014 8 / 41
Counting bits set
&~0U/15*3
x
x
x>>2
&~0U/15*3
+
0..4
David Barina Bit Hacks March 28, 2014 9 / 41
Counting bits set
x -= x >> 1 & ˜0U/3;
x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3);
x = (x + (x >> 4)) & ˜0U/255*15;
c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT;
David Barina Bit Hacks March 28, 2014 10 / 41
Counting bits set
x
x>>4
&~0U/255*15
+
0..8
David Barina Bit Hacks March 28, 2014 11 / 41
Counting bits set
x -= x >> 1 & ˜0U/3;
x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3);
x = (x + (x >> 4)) & ˜0U/255*15;
c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT;
David Barina Bit Hacks March 28, 2014 12 / 41
Counting bits set
0..8 x
~0U/2551 1 1 1
*
0..8 0..8 0..8
0..80..80..80..8
0..80..80..80..8
0..80..80..80..8
0..80..80..80..8
0..32
0..32 >>24
David Barina Bit Hacks March 28, 2014 13 / 41
Counting bits set
algorithm time
naive 30.943180 ns
Kernighan 17.374596 ns
parallel 4.136793 ns
speedup 7.5×
David Barina Bit Hacks March 28, 2014 14 / 41
Copy the highest set bit to all of the lower bits
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
a.k.a. copymsb
David Barina Bit Hacks March 28, 2014 15 / 41
Copy the highest set bit to all of the lower bits
x
x>>1
|
x|=x>>1
x>>2
|
x|=x>>2
x>>4
|
x|=x>>4
David Barina Bit Hacks March 28, 2014 16 / 41
Copy the highest set bit to all of the lower bits
unsigned shift = 1;
while(shift < sizeof(int) * CHAR_BIT)
{
x |= x >> shift;
shift <<= 1;
}
David Barina Bit Hacks March 28, 2014 17 / 41
Copy the highest set bit to all of the lower bits
algorithm time
unroll 3.687508 ns
loop 3.689689 ns
speedup 1.0×
David Barina Bit Hacks March 28, 2014 18 / 41
Find the log base 2 of an integer
log2(x) = −1 : x < 1
log2(x) = log2(x + 1) − 1
log2(x) = log2(x − 1) + 1
David Barina Bit Hacks March 28, 2014 19 / 41
Find the log base 2 of an integer: IEEE double
unsigned x;
unsigned r;
r = (unsigned)ceil(log2((double)x));
r = (unsigned)floor(log2((double)x));
David Barina Bit Hacks March 28, 2014 20 / 41
Find the log base 2 of an integer: naive way (floor)
unsigned r = 0;
while(x >>= 1)
{
r++;
}
David Barina Bit Hacks March 28, 2014 21 / 41
Find the log base 2 of an integer: fast way (floor)
unsigned r;
unsigned shift;
r = (x > 0xFFFF) << 4; x >>= r;
shift = (x > 0xFF ) << 3; x >>= shift; r |= shift;
shift = (x > 0xF ) << 2; x >>= shift; r |= shift;
shift = (x > 0x3 ) << 1; x >>= shift; r |= shift;
r |= (x >> 1);
David Barina Bit Hacks March 28, 2014 22 / 41
Find the log base 2 of an integer: fast way (floor)
x
x>0xFFFF
x>>16, r+=16
x>0xFF
x>>8, r+=8
x>0xF
x>>4, r+=4
x>0x3
x>>2, r+=2
x>>1, r+=1
David Barina Bit Hacks March 28, 2014 23 / 41
Find the log base 2 of an integer: popcount (ceil)
x--;
x = copymsb(x); // next pow2 minus 1
r = popcount(x);
David Barina Bit Hacks March 28, 2014 24 / 41
Find the log base 2 of an integer: popcount (ceil)
x
x = copy MSB
r = popcount
{
r
David Barina Bit Hacks March 28, 2014 25 / 41
Find the log base 2 of an integer: DeBruijn (floor)
static const int MultiplyDeBruijnBitPosition[32] =
{
0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31
};
x = copymsb(x); // next pow2 minus 1
r = MultiplyDeBruijnBitPosition[
(unsigned)(x * 0x07C4ACDDU) >> 27];
David Barina Bit Hacks March 28, 2014 26 / 41
Find the log base 2 of an integer: fast loop (floor)
const unsigned int b[] = {
0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000 };
const unsigned int S[] = { 1, 2, 4, 8, 16 };
register unsigned int r = 0;
for(int i = 4; i >= 0; i--)
{
if(x & b[i])
{
x >>= S[i];
r |= S[i];
}
}
David Barina Bit Hacks March 28, 2014 27 / 41
Find the log base 2 of an integer: table (floor)
static const char LogTable256[256] =
{
#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
-1,
0,
1, 1,
2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3,
LT(4),
LT(5), LT(5),
LT(6), LT(6), LT(6), LT(6),
LT(7), LT(7), LT(7), LT(7),
LT(7), LT(7), LT(7), LT(7)
#undef LT
};
David Barina Bit Hacks March 28, 2014 28 / 41
Find the log base 2 of an integer: table (floor)
if(t = x >> 24)
{
r = 24 + LogTable256[t];
}
else if(t = x >> 16)
{
r = 16 + LogTable256[t];
}
else if(t = x >> 8)
{
r = 8 + LogTable256[t];
}
else
{
r = LogTable256[x];
}
David Barina Bit Hacks March 28, 2014 29 / 41
Find the log base 2 of an integer
algorithm floor ceil
double 64.648230 ns 64.569502 ns
naive 20.249388 ns 20.179684 ns
fast 12.542725 ns 12.199326 ns
popcount 8.794856 ns 8.409071 ns
DeBruijn 4.519947 ns 4.809108 ns
fast loop 3.784315 ns 3.716202 ns
table 1.834092 ns 1.835536 ns
speedup > 35×
David Barina Bit Hacks March 28, 2014 30 / 41
Next power of 2: copymsb
x--;
x = copymsb(x);
x++;
David Barina Bit Hacks March 28, 2014 31 / 41
Next power of 2: table
x = 1 << ceil_log2(x);
David Barina Bit Hacks March 28, 2014 32 / 41
Next power of 2
algorithm time
copymsb 3.976506 ns
log2 table 2.169806 ns
speedup > 1.8×
David Barina Bit Hacks March 28, 2014 33 / 41
Is power of 2
if( 0 == (x & (x-1)) )
{
// ...
}
David Barina Bit Hacks March 28, 2014 34 / 41
Select minimum, maximum: naive
(x < y) ? x : y
(x > y) ? x : y
David Barina Bit Hacks March 28, 2014 35 / 41
Select minimum, maximum: xor
(((x-y) >> (sizeof(int)*CHAR_BIT-1)) & (xˆy)) ˆ y
(((x-y) >> (sizeof(int)*CHAR_BIT-1)) & (yˆx)) ˆ x
x = y ˆ (xˆy)
y = y ˆ 0
(-z) >> 31 = 0xffffffff
(+z) >> 31 = 0x00000000
David Barina Bit Hacks March 28, 2014 36 / 41
Select minimum, maximum
algorithm min max
naive 1.509076 ns 1.503108 ns
xor 2.035053 ns 1.847343 ns
speedup 0.7×
David Barina Bit Hacks March 28, 2014 37 / 41
Absolute value (32-bit)
// stdlib.h
printf("%in", abs(+1000000000));
printf("%in", abs(-1000000000));
printf("%in", abs(-2147483648));
David Barina Bit Hacks March 28, 2014 38 / 41
Absolute value (32-bit)
// stdlib.h
printf("%in", abs(+1000000000));
printf("%in", abs(-1000000000));
printf("%in", abs(-2147483648));
1000000000
1000000000
-2147483648
David Barina Bit Hacks March 28, 2014 38 / 41
Negate (32-bit)
// negate all bits and add "1"
printf("%in", negate(+1000000000));
printf("%in", negate(-1000000000));
printf("%in", negate(-2147483648));
David Barina Bit Hacks March 28, 2014 39 / 41
Negate (32-bit)
// negate all bits and add "1"
printf("%in", negate(+1000000000));
printf("%in", negate(-1000000000));
printf("%in", negate(-2147483648));
-1000000000
1000000000
-2147483648
David Barina Bit Hacks March 28, 2014 39 / 41
Overflow (32-bit and 64-bit)
printf("%in", (int)+2147483648); // long int => int
printf("%lin", +2147483648); // long int
David Barina Bit Hacks March 28, 2014 40 / 41
Overflow (32-bit and 64-bit)
printf("%in", (int)+2147483648); // long int => int
printf("%lin", +2147483648); // long int
-2147483648
2147483648
David Barina Bit Hacks March 28, 2014 40 / 41
Sources
x86, Intel Core2 Quad @ 2.00 GHz
gcc 4.8 -O3
https://www.google.com/?q=Bit+Twiddling
http://graphics.stanford.edu/˜seander/bithacks.html
http://www.fefe.de/intof.html
David Barina Bit Hacks March 28, 2014 41 / 41

More Related Content

What's hot

Boost Performance With My S Q L 51 Partitions
Boost Performance With  My S Q L 51 PartitionsBoost Performance With  My S Q L 51 Partitions
Boost Performance With My S Q L 51 Partitions
PerconaPerformance
 
Python 2.5 reference card (2009)
Python 2.5 reference card (2009)Python 2.5 reference card (2009)
Python 2.5 reference card (2009)
gekiaruj
 

What's hot (17)

Basic arithmetic, instruction execution and program
Basic arithmetic, instruction execution and programBasic arithmetic, instruction execution and program
Basic arithmetic, instruction execution and program
 
Enter The Matrix
Enter The MatrixEnter The Matrix
Enter The Matrix
 
A few things about the Oracle optimizer - 2013
A few things about the Oracle optimizer - 2013A few things about the Oracle optimizer - 2013
A few things about the Oracle optimizer - 2013
 
Boost Performance With My S Q L 51 Partitions
Boost Performance With  My S Q L 51 PartitionsBoost Performance With  My S Q L 51 Partitions
Boost Performance With My S Q L 51 Partitions
 
Maneuvering target track prediction model
Maneuvering target track prediction modelManeuvering target track prediction model
Maneuvering target track prediction model
 
Python3 cheatsheet
Python3 cheatsheetPython3 cheatsheet
Python3 cheatsheet
 
Java 8 - An Introduction by Jason Swartz
Java 8 - An Introduction by Jason SwartzJava 8 - An Introduction by Jason Swartz
Java 8 - An Introduction by Jason Swartz
 
Fast, stable and scalable true radix sorting with Matt Dowle at useR! Aalborg
Fast, stable and scalable true radix sorting with Matt Dowle at useR! AalborgFast, stable and scalable true radix sorting with Matt Dowle at useR! Aalborg
Fast, stable and scalable true radix sorting with Matt Dowle at useR! Aalborg
 
Numpy tutorial(final) 20160303
Numpy tutorial(final) 20160303Numpy tutorial(final) 20160303
Numpy tutorial(final) 20160303
 
Python 2.5 reference card (2009)
Python 2.5 reference card (2009)Python 2.5 reference card (2009)
Python 2.5 reference card (2009)
 
Heaps 4up
Heaps 4upHeaps 4up
Heaps 4up
 
Chapter 9 ds
Chapter 9 dsChapter 9 ds
Chapter 9 ds
 
Lec8
Lec8Lec8
Lec8
 
chap 2 Ex#1.1
chap 2 Ex#1.1chap 2 Ex#1.1
chap 2 Ex#1.1
 
3 pandasadvanced
3 pandasadvanced3 pandasadvanced
3 pandasadvanced
 
Lec9
Lec9Lec9
Lec9
 
Set Operations - Union Find and Bloom Filters
Set Operations - Union Find and Bloom FiltersSet Operations - Union Find and Bloom Filters
Set Operations - Union Find and Bloom Filters
 

Viewers also liked

Single-Loop Software Architecture for JPEG 2000
Single-Loop Software Architecture for JPEG 2000Single-Loop Software Architecture for JPEG 2000
Single-Loop Software Architecture for JPEG 2000
David Bařina
 
Diplomova prace - obhajoba
Diplomova prace - obhajobaDiplomova prace - obhajoba
Diplomova prace - obhajoba
Michal Klajban
 
obhajoba_prace
obhajoba_praceobhajoba_prace
obhajoba_prace
zlamalp
 

Viewers also liked (18)

GStreamer
GStreamerGStreamer
GStreamer
 
IIR aproximace Gaussovy funkce
IIR aproximace Gaussovy funkceIIR aproximace Gaussovy funkce
IIR aproximace Gaussovy funkce
 
Wavelets @ CPU
Wavelets @ CPUWavelets @ CPU
Wavelets @ CPU
 
Parallel Wavelet Schemes for Images
Parallel Wavelet Schemes for ImagesParallel Wavelet Schemes for Images
Parallel Wavelet Schemes for Images
 
Single-Loop Software Architecture for JPEG 2000
Single-Loop Software Architecture for JPEG 2000Single-Loop Software Architecture for JPEG 2000
Single-Loop Software Architecture for JPEG 2000
 
Akcelerace DWT pomocí SIMD
Akcelerace DWT pomocí SIMDAkcelerace DWT pomocí SIMD
Akcelerace DWT pomocí SIMD
 
FFmpeg
FFmpegFFmpeg
FFmpeg
 
Wavelet Lifting on Application Specific Vector Processor
Wavelet Lifting on Application Specific Vector ProcessorWavelet Lifting on Application Specific Vector Processor
Wavelet Lifting on Application Specific Vector Processor
 
Fixed-point arithmetic
Fixed-point arithmeticFixed-point arithmetic
Fixed-point arithmetic
 
Lifting Scheme Cores for Wavelet Transform
Lifting Scheme Cores for Wavelet TransformLifting Scheme Cores for Wavelet Transform
Lifting Scheme Cores for Wavelet Transform
 
Real-Time 3-D Wavelet Lifting
Real-Time 3-D Wavelet LiftingReal-Time 3-D Wavelet Lifting
Real-Time 3-D Wavelet Lifting
 
Citovanie a parafrázovanie online
Citovanie a parafrázovanie onlineCitovanie a parafrázovanie online
Citovanie a parafrázovanie online
 
Wavelet News
Wavelet NewsWavelet News
Wavelet News
 
Diplomova prace - obhajoba
Diplomova prace - obhajobaDiplomova prace - obhajoba
Diplomova prace - obhajoba
 
obhajoba_prace
obhajoba_praceobhajoba_prace
obhajoba_prace
 
Prezentace Bakalářské práce - Obhajoba 2012
Prezentace Bakalářské práce - Obhajoba 2012Prezentace Bakalářské práce - Obhajoba 2012
Prezentace Bakalářské práce - Obhajoba 2012
 
bakalarska a diplomova praca - zdroje
bakalarska a diplomova praca - zdrojebakalarska a diplomova praca - zdroje
bakalarska a diplomova praca - zdroje
 
Šablona prezentace pro obhajobu diplomové práce
Šablona prezentace pro obhajobu diplomové práceŠablona prezentace pro obhajobu diplomové práce
Šablona prezentace pro obhajobu diplomové práce
 

Similar to Bit Twiddling Hacks: Integers (7)

chapter3 (2).pdf
chapter3 (2).pdfchapter3 (2).pdf
chapter3 (2).pdf
 
Solr sparse faceting
Solr sparse facetingSolr sparse faceting
Solr sparse faceting
 
Introduction to R programming
Introduction to R programmingIntroduction to R programming
Introduction to R programming
 
Python.pdf
Python.pdfPython.pdf
Python.pdf
 
Introduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning ProgrammersIntroduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning Programmers
 
Applied numerical methods lec2
Applied numerical methods lec2Applied numerical methods lec2
Applied numerical methods lec2
 
Taint-based Dynamic Analysis (CoC Research Day 2009)
Taint-based Dynamic Analysis (CoC Research Day 2009)Taint-based Dynamic Analysis (CoC Research Day 2009)
Taint-based Dynamic Analysis (CoC Research Day 2009)
 

More from David Bařina (8)

CCSDS 122.0
CCSDS 122.0CCSDS 122.0
CCSDS 122.0
 
Lossy Light Field Compression
Lossy Light Field CompressionLossy Light Field Compression
Lossy Light Field Compression
 
Mathematical curiosities
Mathematical curiositiesMathematical curiosities
Mathematical curiosities
 
C/C++ tricks
C/C++ tricksC/C++ tricks
C/C++ tricks
 
New Transforms for JPEG Format
New Transforms for JPEG FormatNew Transforms for JPEG Format
New Transforms for JPEG Format
 
JPEG
JPEGJPEG
JPEG
 
Discrete Wavelet Transforms on Parallel Architectures
Discrete Wavelet Transforms on Parallel ArchitecturesDiscrete Wavelet Transforms on Parallel Architectures
Discrete Wavelet Transforms on Parallel Architectures
 
Parallel Implementation of the 2-D Discrete Wavelet Transform
Parallel Implementation of the 2-D Discrete Wavelet TransformParallel Implementation of the 2-D Discrete Wavelet Transform
Parallel Implementation of the 2-D Discrete Wavelet Transform
 

Recently uploaded

Discovery of an Accretion Streamer and a Slow Wide-angle Outflow around FUOri...
Discovery of an Accretion Streamer and a Slow Wide-angle Outflow around FUOri...Discovery of an Accretion Streamer and a Slow Wide-angle Outflow around FUOri...
Discovery of an Accretion Streamer and a Slow Wide-angle Outflow around FUOri...
Sérgio Sacani
 
Formation of low mass protostars and their circumstellar disks
Formation of low mass protostars and their circumstellar disksFormation of low mass protostars and their circumstellar disks
Formation of low mass protostars and their circumstellar disks
Sérgio Sacani
 
Pests of cotton_Borer_Pests_Binomics_Dr.UPR.pdf
Pests of cotton_Borer_Pests_Binomics_Dr.UPR.pdfPests of cotton_Borer_Pests_Binomics_Dr.UPR.pdf
Pests of cotton_Borer_Pests_Binomics_Dr.UPR.pdf
PirithiRaju
 
Asymmetry in the atmosphere of the ultra-hot Jupiter WASP-76 b
Asymmetry in the atmosphere of the ultra-hot Jupiter WASP-76 bAsymmetry in the atmosphere of the ultra-hot Jupiter WASP-76 b
Asymmetry in the atmosphere of the ultra-hot Jupiter WASP-76 b
Sérgio Sacani
 
DIFFERENCE IN BACK CROSS AND TEST CROSS
DIFFERENCE IN  BACK CROSS AND TEST CROSSDIFFERENCE IN  BACK CROSS AND TEST CROSS
DIFFERENCE IN BACK CROSS AND TEST CROSS
LeenakshiTyagi
 

Recently uploaded (20)

GBSN - Microbiology (Unit 2)
GBSN - Microbiology (Unit 2)GBSN - Microbiology (Unit 2)
GBSN - Microbiology (Unit 2)
 
Physiochemical properties of nanomaterials and its nanotoxicity.pptx
Physiochemical properties of nanomaterials and its nanotoxicity.pptxPhysiochemical properties of nanomaterials and its nanotoxicity.pptx
Physiochemical properties of nanomaterials and its nanotoxicity.pptx
 
❤Jammu Kashmir Call Girls 8617697112 Personal Whatsapp Number 💦✅.
❤Jammu Kashmir Call Girls 8617697112 Personal Whatsapp Number 💦✅.❤Jammu Kashmir Call Girls 8617697112 Personal Whatsapp Number 💦✅.
❤Jammu Kashmir Call Girls 8617697112 Personal Whatsapp Number 💦✅.
 
Botany 4th semester series (krishna).pdf
Botany 4th semester series (krishna).pdfBotany 4th semester series (krishna).pdf
Botany 4th semester series (krishna).pdf
 
Recombinant DNA technology (Immunological screening)
Recombinant DNA technology (Immunological screening)Recombinant DNA technology (Immunological screening)
Recombinant DNA technology (Immunological screening)
 
Discovery of an Accretion Streamer and a Slow Wide-angle Outflow around FUOri...
Discovery of an Accretion Streamer and a Slow Wide-angle Outflow around FUOri...Discovery of an Accretion Streamer and a Slow Wide-angle Outflow around FUOri...
Discovery of an Accretion Streamer and a Slow Wide-angle Outflow around FUOri...
 
Lucknow 💋 Russian Call Girls Lucknow Finest Escorts Service 8923113531 Availa...
Lucknow 💋 Russian Call Girls Lucknow Finest Escorts Service 8923113531 Availa...Lucknow 💋 Russian Call Girls Lucknow Finest Escorts Service 8923113531 Availa...
Lucknow 💋 Russian Call Girls Lucknow Finest Escorts Service 8923113531 Availa...
 
Green chemistry and Sustainable development.pptx
Green chemistry  and Sustainable development.pptxGreen chemistry  and Sustainable development.pptx
Green chemistry and Sustainable development.pptx
 
Formation of low mass protostars and their circumstellar disks
Formation of low mass protostars and their circumstellar disksFormation of low mass protostars and their circumstellar disks
Formation of low mass protostars and their circumstellar disks
 
Pests of cotton_Borer_Pests_Binomics_Dr.UPR.pdf
Pests of cotton_Borer_Pests_Binomics_Dr.UPR.pdfPests of cotton_Borer_Pests_Binomics_Dr.UPR.pdf
Pests of cotton_Borer_Pests_Binomics_Dr.UPR.pdf
 
9654467111 Call Girls In Raj Nagar Delhi Short 1500 Night 6000
9654467111 Call Girls In Raj Nagar Delhi Short 1500 Night 60009654467111 Call Girls In Raj Nagar Delhi Short 1500 Night 6000
9654467111 Call Girls In Raj Nagar Delhi Short 1500 Night 6000
 
VIRUSES structure and classification ppt by Dr.Prince C P
VIRUSES structure and classification ppt by Dr.Prince C PVIRUSES structure and classification ppt by Dr.Prince C P
VIRUSES structure and classification ppt by Dr.Prince C P
 
fundamental of entomology all in one topics of entomology
fundamental of entomology all in one topics of entomologyfundamental of entomology all in one topics of entomology
fundamental of entomology all in one topics of entomology
 
CELL -Structural and Functional unit of life.pdf
CELL -Structural and Functional unit of life.pdfCELL -Structural and Functional unit of life.pdf
CELL -Structural and Functional unit of life.pdf
 
Hire 💕 9907093804 Hooghly Call Girls Service Call Girls Agency
Hire 💕 9907093804 Hooghly Call Girls Service Call Girls AgencyHire 💕 9907093804 Hooghly Call Girls Service Call Girls Agency
Hire 💕 9907093804 Hooghly Call Girls Service Call Girls Agency
 
Botany krishna series 2nd semester Only Mcq type questions
Botany krishna series 2nd semester Only Mcq type questionsBotany krishna series 2nd semester Only Mcq type questions
Botany krishna series 2nd semester Only Mcq type questions
 
Asymmetry in the atmosphere of the ultra-hot Jupiter WASP-76 b
Asymmetry in the atmosphere of the ultra-hot Jupiter WASP-76 bAsymmetry in the atmosphere of the ultra-hot Jupiter WASP-76 b
Asymmetry in the atmosphere of the ultra-hot Jupiter WASP-76 b
 
DIFFERENCE IN BACK CROSS AND TEST CROSS
DIFFERENCE IN  BACK CROSS AND TEST CROSSDIFFERENCE IN  BACK CROSS AND TEST CROSS
DIFFERENCE IN BACK CROSS AND TEST CROSS
 
Chromatin Structure | EUCHROMATIN | HETEROCHROMATIN
Chromatin Structure | EUCHROMATIN | HETEROCHROMATINChromatin Structure | EUCHROMATIN | HETEROCHROMATIN
Chromatin Structure | EUCHROMATIN | HETEROCHROMATIN
 
TEST BANK For Radiologic Science for Technologists, 12th Edition by Stewart C...
TEST BANK For Radiologic Science for Technologists, 12th Edition by Stewart C...TEST BANK For Radiologic Science for Technologists, 12th Edition by Stewart C...
TEST BANK For Radiologic Science for Technologists, 12th Edition by Stewart C...
 

Bit Twiddling Hacks: Integers

  • 1. Bit Twiddling Hacks Integers David Barina March 28, 2014 David Barina Bit Hacks March 28, 2014 1 / 41
  • 2. Counting bits set: naive unsigned x; unsigned c; for(c = 0; x; x >>= 1) { c += x & 1; } a.k.a. population count, popcount David Barina Bit Hacks March 28, 2014 2 / 41
  • 3. Counting bits set: Kernighan for(c = 0; x; c++) { x &= x - 1; } David Barina Bit Hacks March 28, 2014 3 / 41
  • 4. Counting bits set: Kernighan x x-1 x&(x-1) x x-1 x&(x-1) David Barina Bit Hacks March 28, 2014 4 / 41
  • 5. Counting bits set: in parallel x -= x >> 1 & ˜0U/3; x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3); x = (x + (x >> 4)) & ˜0U/255*15; c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT; David Barina Bit Hacks March 28, 2014 5 / 41
  • 6. Counting bits set x -= x >> 1 & ˜0U/3; x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3); x = (x + (x >> 4)) & ˜0U/255*15; c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT; David Barina Bit Hacks March 28, 2014 6 / 41
  • 7. Counting bits set &~0U/3 x x>>1 11 - 01 = 10 = 2 10 - 01 = 01 = 1 01 - 00 = 01 = 1 00 - 00 = 00 = 0 - 0..2 David Barina Bit Hacks March 28, 2014 7 / 41
  • 8. Counting bits set x -= x >> 1 & ˜0U/3; x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3); x = (x + (x >> 4)) & ˜0U/255*15; c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT; David Barina Bit Hacks March 28, 2014 8 / 41
  • 9. Counting bits set &~0U/15*3 x x x>>2 &~0U/15*3 + 0..4 David Barina Bit Hacks March 28, 2014 9 / 41
  • 10. Counting bits set x -= x >> 1 & ˜0U/3; x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3); x = (x + (x >> 4)) & ˜0U/255*15; c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT; David Barina Bit Hacks March 28, 2014 10 / 41
  • 11. Counting bits set x x>>4 &~0U/255*15 + 0..8 David Barina Bit Hacks March 28, 2014 11 / 41
  • 12. Counting bits set x -= x >> 1 & ˜0U/3; x = (x & ˜0U/15*3) + (x >> 2 & ˜0U/15*3); x = (x + (x >> 4)) & ˜0U/255*15; c = (x * (˜0U/255)) >> (sizeof(unsigned) - 1) * CHAR_BIT; David Barina Bit Hacks March 28, 2014 12 / 41
  • 13. Counting bits set 0..8 x ~0U/2551 1 1 1 * 0..8 0..8 0..8 0..80..80..80..8 0..80..80..80..8 0..80..80..80..8 0..80..80..80..8 0..32 0..32 >>24 David Barina Bit Hacks March 28, 2014 13 / 41
  • 14. Counting bits set algorithm time naive 30.943180 ns Kernighan 17.374596 ns parallel 4.136793 ns speedup 7.5× David Barina Bit Hacks March 28, 2014 14 / 41
  • 15. Copy the highest set bit to all of the lower bits x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; a.k.a. copymsb David Barina Bit Hacks March 28, 2014 15 / 41
  • 16. Copy the highest set bit to all of the lower bits x x>>1 | x|=x>>1 x>>2 | x|=x>>2 x>>4 | x|=x>>4 David Barina Bit Hacks March 28, 2014 16 / 41
  • 17. Copy the highest set bit to all of the lower bits unsigned shift = 1; while(shift < sizeof(int) * CHAR_BIT) { x |= x >> shift; shift <<= 1; } David Barina Bit Hacks March 28, 2014 17 / 41
  • 18. Copy the highest set bit to all of the lower bits algorithm time unroll 3.687508 ns loop 3.689689 ns speedup 1.0× David Barina Bit Hacks March 28, 2014 18 / 41
  • 19. Find the log base 2 of an integer log2(x) = −1 : x < 1 log2(x) = log2(x + 1) − 1 log2(x) = log2(x − 1) + 1 David Barina Bit Hacks March 28, 2014 19 / 41
  • 20. Find the log base 2 of an integer: IEEE double unsigned x; unsigned r; r = (unsigned)ceil(log2((double)x)); r = (unsigned)floor(log2((double)x)); David Barina Bit Hacks March 28, 2014 20 / 41
  • 21. Find the log base 2 of an integer: naive way (floor) unsigned r = 0; while(x >>= 1) { r++; } David Barina Bit Hacks March 28, 2014 21 / 41
  • 22. Find the log base 2 of an integer: fast way (floor) unsigned r; unsigned shift; r = (x > 0xFFFF) << 4; x >>= r; shift = (x > 0xFF ) << 3; x >>= shift; r |= shift; shift = (x > 0xF ) << 2; x >>= shift; r |= shift; shift = (x > 0x3 ) << 1; x >>= shift; r |= shift; r |= (x >> 1); David Barina Bit Hacks March 28, 2014 22 / 41
  • 23. Find the log base 2 of an integer: fast way (floor) x x>0xFFFF x>>16, r+=16 x>0xFF x>>8, r+=8 x>0xF x>>4, r+=4 x>0x3 x>>2, r+=2 x>>1, r+=1 David Barina Bit Hacks March 28, 2014 23 / 41
  • 24. Find the log base 2 of an integer: popcount (ceil) x--; x = copymsb(x); // next pow2 minus 1 r = popcount(x); David Barina Bit Hacks March 28, 2014 24 / 41
  • 25. Find the log base 2 of an integer: popcount (ceil) x x = copy MSB r = popcount { r David Barina Bit Hacks March 28, 2014 25 / 41
  • 26. Find the log base 2 of an integer: DeBruijn (floor) static const int MultiplyDeBruijnBitPosition[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; x = copymsb(x); // next pow2 minus 1 r = MultiplyDeBruijnBitPosition[ (unsigned)(x * 0x07C4ACDDU) >> 27]; David Barina Bit Hacks March 28, 2014 26 / 41
  • 27. Find the log base 2 of an integer: fast loop (floor) const unsigned int b[] = { 0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000 }; const unsigned int S[] = { 1, 2, 4, 8, 16 }; register unsigned int r = 0; for(int i = 4; i >= 0; i--) { if(x & b[i]) { x >>= S[i]; r |= S[i]; } } David Barina Bit Hacks March 28, 2014 27 / 41
  • 28. Find the log base 2 of an integer: table (floor) static const char LogTable256[256] = { #define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7) #undef LT }; David Barina Bit Hacks March 28, 2014 28 / 41
  • 29. Find the log base 2 of an integer: table (floor) if(t = x >> 24) { r = 24 + LogTable256[t]; } else if(t = x >> 16) { r = 16 + LogTable256[t]; } else if(t = x >> 8) { r = 8 + LogTable256[t]; } else { r = LogTable256[x]; } David Barina Bit Hacks March 28, 2014 29 / 41
  • 30. Find the log base 2 of an integer algorithm floor ceil double 64.648230 ns 64.569502 ns naive 20.249388 ns 20.179684 ns fast 12.542725 ns 12.199326 ns popcount 8.794856 ns 8.409071 ns DeBruijn 4.519947 ns 4.809108 ns fast loop 3.784315 ns 3.716202 ns table 1.834092 ns 1.835536 ns speedup > 35× David Barina Bit Hacks March 28, 2014 30 / 41
  • 31. Next power of 2: copymsb x--; x = copymsb(x); x++; David Barina Bit Hacks March 28, 2014 31 / 41
  • 32. Next power of 2: table x = 1 << ceil_log2(x); David Barina Bit Hacks March 28, 2014 32 / 41
  • 33. Next power of 2 algorithm time copymsb 3.976506 ns log2 table 2.169806 ns speedup > 1.8× David Barina Bit Hacks March 28, 2014 33 / 41
  • 34. Is power of 2 if( 0 == (x & (x-1)) ) { // ... } David Barina Bit Hacks March 28, 2014 34 / 41
  • 35. Select minimum, maximum: naive (x < y) ? x : y (x > y) ? x : y David Barina Bit Hacks March 28, 2014 35 / 41
  • 36. Select minimum, maximum: xor (((x-y) >> (sizeof(int)*CHAR_BIT-1)) & (xˆy)) ˆ y (((x-y) >> (sizeof(int)*CHAR_BIT-1)) & (yˆx)) ˆ x x = y ˆ (xˆy) y = y ˆ 0 (-z) >> 31 = 0xffffffff (+z) >> 31 = 0x00000000 David Barina Bit Hacks March 28, 2014 36 / 41
  • 37. Select minimum, maximum algorithm min max naive 1.509076 ns 1.503108 ns xor 2.035053 ns 1.847343 ns speedup 0.7× David Barina Bit Hacks March 28, 2014 37 / 41
  • 38. Absolute value (32-bit) // stdlib.h printf("%in", abs(+1000000000)); printf("%in", abs(-1000000000)); printf("%in", abs(-2147483648)); David Barina Bit Hacks March 28, 2014 38 / 41
  • 39. Absolute value (32-bit) // stdlib.h printf("%in", abs(+1000000000)); printf("%in", abs(-1000000000)); printf("%in", abs(-2147483648)); 1000000000 1000000000 -2147483648 David Barina Bit Hacks March 28, 2014 38 / 41
  • 40. Negate (32-bit) // negate all bits and add "1" printf("%in", negate(+1000000000)); printf("%in", negate(-1000000000)); printf("%in", negate(-2147483648)); David Barina Bit Hacks March 28, 2014 39 / 41
  • 41. Negate (32-bit) // negate all bits and add "1" printf("%in", negate(+1000000000)); printf("%in", negate(-1000000000)); printf("%in", negate(-2147483648)); -1000000000 1000000000 -2147483648 David Barina Bit Hacks March 28, 2014 39 / 41
  • 42. Overflow (32-bit and 64-bit) printf("%in", (int)+2147483648); // long int => int printf("%lin", +2147483648); // long int David Barina Bit Hacks March 28, 2014 40 / 41
  • 43. Overflow (32-bit and 64-bit) printf("%in", (int)+2147483648); // long int => int printf("%lin", +2147483648); // long int -2147483648 2147483648 David Barina Bit Hacks March 28, 2014 40 / 41
  • 44. Sources x86, Intel Core2 Quad @ 2.00 GHz gcc 4.8 -O3 https://www.google.com/?q=Bit+Twiddling http://graphics.stanford.edu/˜seander/bithacks.html http://www.fefe.de/intof.html David Barina Bit Hacks March 28, 2014 41 / 41