SlideShare a Scribd company logo
1 of 11
Download to read offline
Floating point basics
The core idea of floating-point representations (as opposed to fixed point representations as used
by, say, ints), is that a number x is written as m*be where m is a mantissa or fractional part, b is a
base, and e is an exponent. On modern computers the base is almost always 2, and for most
floating-point representations the mantissa will be scaled to be between 1 and b. This is done by
adjusting the exponent, e.g.
1 = 1*20
2 = 1*21
0.375 = 1.5*2-2
etc.
Iam writing a program that does some floating addition that uses bit patterns with shifts applied
to the mantissa and such to obtain the sum of the two floating point numbers. Logically and on
paper I can get this to compute the correct sum.
Code:
#include
#include
#include
#include
int isNegative (float f)
{
unsigned int* iptr = (unsigned int*)&f;
return ( ((*iptr) & 0x80000000) ? 1:0);
}
unsigned char getExponent (float f)
{
unsigned int* iptr = (unsigned int*)&f;
return (((*iptr >> 23) & 0xff) - 127);
}
unsigned int getMantissa (float f)
{
unsigned int* iptr = (unsigned int*)&f;
if( *iptr == 0 ) return 0;
return ((*iptr & 0xFFFFFF) | 0x800000 );
}
float sum (float left, float right)
{
unsigned int littleMan;
unsigned int bigMan;
unsigned char littleE;
unsigned char bigE;
unsigned char lexp = getExponent(left);
unsigned char rexp = getExponent(right);
int Dexponent;
if (lexp > rexp)
{
bigE = lexp;
bigMan = getMantissa(left);
littleE = rexp;
littleMan = getMantissa(right);
}
else
{
bigE = rexp;
bigMan = getMantissa(right);
littleE = lexp;
littleMan = getMantissa(left);
}
printf("little: %x %x ", littleE, littleMan);
printf("big: %x %x ", bigE, bigMan);
void shift( unsigned int *valToShift, int bitsToShift )
{
// Masks is used to mask out bits to check for a "sticky" bit.
static unsigned masks[24] =
{
0, 1, 3, 7, 0xf, 0x1f, 0x3f, 0x7f,
0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff,
0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff
};
// HOmasks - masks out the H.O. bit of the value masked by the masks entry.
static unsigned HOmasks[24] =
{
0,
1, 2, 4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000,
0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000
};
// shiftedOut- Holds the value that will be shifted out of a mantissa
// during the denormalization operation (used to round a denormalized value).
int shiftedOut;
assert( bitsToShift <= 23 );
// Grabs the bits we're going to shift out (so we can determine
// how to round this value after the shift).
shiftedOut = *valToShift & masks[ bitsToShift ];
// Shift the value to the right the specified number of bits:
*valToShift = *valToShift >> bitsToShift;
// If necessary, round the value:
if( shiftedOut > HOmasks[ bitsToShift ] )
{
// If the bits we shifted out are greater than 1/2 the L.O. bit, then
// round the value up by one.
*valToShift = *valToShift + 1;
}
else if( shiftedOut == HOmasks[ bitsToShift ] )
{
// If the bits we shifted out are exactly 1/2 of the L.O. bit's value,
// then round the value to the nearest number whose L.O. bit is zero.
*valToShift = *valToShift + ((*valToShift & 1) == 1);
}
// else we round the value down to the previous value. The current
// value is already truncated (rounded down), so we don't have to do anything.
}
// I got two actual floating point values. I want to add them together.
// 1. "denormalize" one of the operands if their exponents aren't
// the same (when adding or subtracting values, the exponents must be the same).
//
// Algorithm: choose the value with the smaller exponent. Shift its mantissa
// to the right the number of bits specified by the difference between the two
// exponents.
if( rexp > lexp )
{
shift( &littleMan, (rexp - lexp));
Dexponent = rexp;
}
else if( rexp < lexp )
{
shift( &littleMan, (lexp - rexp));
Dexponent = lexp;
}
unsigned int result = Dexponent;
float fresult = *(float*)&result;
return(fresult);
}
int main()
{
const int SIZE = 256;
char line[SIZE];
while (1)
{
float f1;
float f2;
float left = f1;
float right = f2;
printf("Please enter the first float ( "q" to quit):");
fgets(line,SIZE,stdin);
if (toupper(line[0]) =='Q')
break;
f1 = atof(line);
printf("Please enter the second float ( "q" to quit):");
fgets(line,SIZE,stdin);
if (toupper(line[0]) == 'Q')
break;
f2 = atof(line);
if (isNegative(f1) || isNegative(f2))
printf ("One of thse is negative, but %g + %g == %g ", f1,f2,sum(f1,f2));
else
printf("%g + %g == %g ", f1,f2,sum(f1,f2));
}
return(EXIT_SUCCESS);
1 = 1*20
2 = 1*21
0.375 = 1.5*2-2
Solution
Floating point basics
The core idea of floating-point representations (as opposed to fixed point representations as used
by, say, ints), is that a number x is written as m*be where m is a mantissa or fractional part, b is a
base, and e is an exponent. On modern computers the base is almost always 2, and for most
floating-point representations the mantissa will be scaled to be between 1 and b. This is done by
adjusting the exponent, e.g.
1 = 1*20
2 = 1*21
0.375 = 1.5*2-2
etc.
Iam writing a program that does some floating addition that uses bit patterns with shifts applied
to the mantissa and such to obtain the sum of the two floating point numbers. Logically and on
paper I can get this to compute the correct sum.
Code:
#include
#include
#include
#include
int isNegative (float f)
{
unsigned int* iptr = (unsigned int*)&f;
return ( ((*iptr) & 0x80000000) ? 1:0);
}
unsigned char getExponent (float f)
{
unsigned int* iptr = (unsigned int*)&f;
return (((*iptr >> 23) & 0xff) - 127);
}
unsigned int getMantissa (float f)
{
unsigned int* iptr = (unsigned int*)&f;
if( *iptr == 0 ) return 0;
return ((*iptr & 0xFFFFFF) | 0x800000 );
}
float sum (float left, float right)
{
unsigned int littleMan;
unsigned int bigMan;
unsigned char littleE;
unsigned char bigE;
unsigned char lexp = getExponent(left);
unsigned char rexp = getExponent(right);
int Dexponent;
if (lexp > rexp)
{
bigE = lexp;
bigMan = getMantissa(left);
littleE = rexp;
littleMan = getMantissa(right);
}
else
{
bigE = rexp;
bigMan = getMantissa(right);
littleE = lexp;
littleMan = getMantissa(left);
}
printf("little: %x %x ", littleE, littleMan);
printf("big: %x %x ", bigE, bigMan);
void shift( unsigned int *valToShift, int bitsToShift )
{
// Masks is used to mask out bits to check for a "sticky" bit.
static unsigned masks[24] =
{
0, 1, 3, 7, 0xf, 0x1f, 0x3f, 0x7f,
0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff,
0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff
};
// HOmasks - masks out the H.O. bit of the value masked by the masks entry.
static unsigned HOmasks[24] =
{
0,
1, 2, 4, 0x8, 0x10, 0x20, 0x40, 0x80,
0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000,
0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000
};
// shiftedOut- Holds the value that will be shifted out of a mantissa
// during the denormalization operation (used to round a denormalized value).
int shiftedOut;
assert( bitsToShift <= 23 );
// Grabs the bits we're going to shift out (so we can determine
// how to round this value after the shift).
shiftedOut = *valToShift & masks[ bitsToShift ];
// Shift the value to the right the specified number of bits:
*valToShift = *valToShift >> bitsToShift;
// If necessary, round the value:
if( shiftedOut > HOmasks[ bitsToShift ] )
{
// If the bits we shifted out are greater than 1/2 the L.O. bit, then
// round the value up by one.
*valToShift = *valToShift + 1;
}
else if( shiftedOut == HOmasks[ bitsToShift ] )
{
// If the bits we shifted out are exactly 1/2 of the L.O. bit's value,
// then round the value to the nearest number whose L.O. bit is zero.
*valToShift = *valToShift + ((*valToShift & 1) == 1);
}
// else we round the value down to the previous value. The current
// value is already truncated (rounded down), so we don't have to do anything.
}
// I got two actual floating point values. I want to add them together.
// 1. "denormalize" one of the operands if their exponents aren't
// the same (when adding or subtracting values, the exponents must be the same).
//
// Algorithm: choose the value with the smaller exponent. Shift its mantissa
// to the right the number of bits specified by the difference between the two
// exponents.
if( rexp > lexp )
{
shift( &littleMan, (rexp - lexp));
Dexponent = rexp;
}
else if( rexp < lexp )
{
shift( &littleMan, (lexp - rexp));
Dexponent = lexp;
}
unsigned int result = Dexponent;
float fresult = *(float*)&result;
return(fresult);
}
int main()
{
const int SIZE = 256;
char line[SIZE];
while (1)
{
float f1;
float f2;
float left = f1;
float right = f2;
printf("Please enter the first float ( "q" to quit):");
fgets(line,SIZE,stdin);
if (toupper(line[0]) =='Q')
break;
f1 = atof(line);
printf("Please enter the second float ( "q" to quit):");
fgets(line,SIZE,stdin);
if (toupper(line[0]) == 'Q')
break;
f2 = atof(line);
if (isNegative(f1) || isNegative(f2))
printf ("One of thse is negative, but %g + %g == %g ", f1,f2,sum(f1,f2));
else
printf("%g + %g == %g ", f1,f2,sum(f1,f2));
}
return(EXIT_SUCCESS);
1 = 1*20
2 = 1*21
0.375 = 1.5*2-2

More Related Content

Similar to Floating point basicsThe core idea of floating-point representatio.pdf

Lecture 3 and 4.pptx
Lecture 3 and 4.pptxLecture 3 and 4.pptx
Lecture 3 and 4.pptxMAHAMASADIK
 
Let Us Learn Lambda Using C# 3.0
Let Us Learn Lambda Using C# 3.0Let Us Learn Lambda Using C# 3.0
Let Us Learn Lambda Using C# 3.0Sheik Uduman Ali
 
Use the same variable names and write the function F - Force(x-ks-kc-l.pdf
Use the same variable names and write the function F - Force(x-ks-kc-l.pdfUse the same variable names and write the function F - Force(x-ks-kc-l.pdf
Use the same variable names and write the function F - Force(x-ks-kc-l.pdfacteleshoppe
 
An Introduction to Part of C++ STL
An Introduction to Part of C++ STLAn Introduction to Part of C++ STL
An Introduction to Part of C++ STL乐群 陈
 
introduction to c programming and C History.pptx
introduction to c programming and C History.pptxintroduction to c programming and C History.pptx
introduction to c programming and C History.pptxManojKhadilkar1
 
Rasterisation of a circle by the bresenham algorithm
Rasterisation of a circle by the bresenham algorithmRasterisation of a circle by the bresenham algorithm
Rasterisation of a circle by the bresenham algorithmKALAIRANJANI21
 
Rasterisation of a circle by the bresenham algorithm
Rasterisation of a circle by the bresenham algorithmRasterisation of a circle by the bresenham algorithm
Rasterisation of a circle by the bresenham algorithmKALAIRANJANI21
 
Maxima Finding problem In the 2-dimension space, we shall say that .pdf
Maxima Finding problem In the 2-dimension space, we shall say that .pdfMaxima Finding problem In the 2-dimension space, we shall say that .pdf
Maxima Finding problem In the 2-dimension space, we shall say that .pdfarrowit1
 
[FT-11][suhorng] “Poor Man's” Undergraduate Compilers
[FT-11][suhorng] “Poor Man's” Undergraduate Compilers[FT-11][suhorng] “Poor Man's” Undergraduate Compilers
[FT-11][suhorng] “Poor Man's” Undergraduate CompilersFunctional Thursday
 
Building fast interpreters in Rust
Building fast interpreters in RustBuilding fast interpreters in Rust
Building fast interpreters in RustIngvar Stepanyan
 
Go Programming Language (Golang)
Go Programming Language (Golang)Go Programming Language (Golang)
Go Programming Language (Golang)Ishin Vin
 
Stacks,queues,linked-list
Stacks,queues,linked-listStacks,queues,linked-list
Stacks,queues,linked-listpinakspatel
 
Intro to Matlab programming
Intro to Matlab programmingIntro to Matlab programming
Intro to Matlab programmingAhmed Moawad
 
The concept of stack is extremely important in computer science and .pdf
The concept of stack is extremely important in computer science and .pdfThe concept of stack is extremely important in computer science and .pdf
The concept of stack is extremely important in computer science and .pdfarihantsherwani
 
Intel JIT Talk
Intel JIT TalkIntel JIT Talk
Intel JIT Talkiamdvander
 
The Functional Programming Triad of Folding, Scanning and Iteration - a first...
The Functional Programming Triad of Folding, Scanning and Iteration - a first...The Functional Programming Triad of Folding, Scanning and Iteration - a first...
The Functional Programming Triad of Folding, Scanning and Iteration - a first...Philip Schwarz
 
Write a Matlab code (a computerized program) for calculating plane st.docx
 Write a Matlab code (a computerized program) for calculating plane st.docx Write a Matlab code (a computerized program) for calculating plane st.docx
Write a Matlab code (a computerized program) for calculating plane st.docxajoy21
 

Similar to Floating point basicsThe core idea of floating-point representatio.pdf (20)

Lecture 3 and 4.pptx
Lecture 3 and 4.pptxLecture 3 and 4.pptx
Lecture 3 and 4.pptx
 
Let Us Learn Lambda Using C# 3.0
Let Us Learn Lambda Using C# 3.0Let Us Learn Lambda Using C# 3.0
Let Us Learn Lambda Using C# 3.0
 
Use the same variable names and write the function F - Force(x-ks-kc-l.pdf
Use the same variable names and write the function F - Force(x-ks-kc-l.pdfUse the same variable names and write the function F - Force(x-ks-kc-l.pdf
Use the same variable names and write the function F - Force(x-ks-kc-l.pdf
 
Pointer
PointerPointer
Pointer
 
An Introduction to Part of C++ STL
An Introduction to Part of C++ STLAn Introduction to Part of C++ STL
An Introduction to Part of C++ STL
 
introduction to c programming and C History.pptx
introduction to c programming and C History.pptxintroduction to c programming and C History.pptx
introduction to c programming and C History.pptx
 
Rasterisation of a circle by the bresenham algorithm
Rasterisation of a circle by the bresenham algorithmRasterisation of a circle by the bresenham algorithm
Rasterisation of a circle by the bresenham algorithm
 
Rasterisation of a circle by the bresenham algorithm
Rasterisation of a circle by the bresenham algorithmRasterisation of a circle by the bresenham algorithm
Rasterisation of a circle by the bresenham algorithm
 
Maxima Finding problem In the 2-dimension space, we shall say that .pdf
Maxima Finding problem In the 2-dimension space, we shall say that .pdfMaxima Finding problem In the 2-dimension space, we shall say that .pdf
Maxima Finding problem In the 2-dimension space, we shall say that .pdf
 
[FT-11][suhorng] “Poor Man's” Undergraduate Compilers
[FT-11][suhorng] “Poor Man's” Undergraduate Compilers[FT-11][suhorng] “Poor Man's” Undergraduate Compilers
[FT-11][suhorng] “Poor Man's” Undergraduate Compilers
 
Building fast interpreters in Rust
Building fast interpreters in RustBuilding fast interpreters in Rust
Building fast interpreters in Rust
 
CPP Homework Help
CPP Homework HelpCPP Homework Help
CPP Homework Help
 
Go Programming Language (Golang)
Go Programming Language (Golang)Go Programming Language (Golang)
Go Programming Language (Golang)
 
Stacks,queues,linked-list
Stacks,queues,linked-listStacks,queues,linked-list
Stacks,queues,linked-list
 
Intro to Matlab programming
Intro to Matlab programmingIntro to Matlab programming
Intro to Matlab programming
 
The concept of stack is extremely important in computer science and .pdf
The concept of stack is extremely important in computer science and .pdfThe concept of stack is extremely important in computer science and .pdf
The concept of stack is extremely important in computer science and .pdf
 
week-23x
week-23xweek-23x
week-23x
 
Intel JIT Talk
Intel JIT TalkIntel JIT Talk
Intel JIT Talk
 
The Functional Programming Triad of Folding, Scanning and Iteration - a first...
The Functional Programming Triad of Folding, Scanning and Iteration - a first...The Functional Programming Triad of Folding, Scanning and Iteration - a first...
The Functional Programming Triad of Folding, Scanning and Iteration - a first...
 
Write a Matlab code (a computerized program) for calculating plane st.docx
 Write a Matlab code (a computerized program) for calculating plane st.docx Write a Matlab code (a computerized program) for calculating plane st.docx
Write a Matlab code (a computerized program) for calculating plane st.docx
 

More from info235816

2H2+O2=2H2O2 moles react with one mole of oxygen to give two moles.pdf
2H2+O2=2H2O2 moles react with one mole of oxygen to give two moles.pdf2H2+O2=2H2O2 moles react with one mole of oxygen to give two moles.pdf
2H2+O2=2H2O2 moles react with one mole of oxygen to give two moles.pdfinfo235816
 
10 = ( 1+ 65200 ) ^n10 = 1.0115 ^ (n)n = 201.37Solu.pdf
10 = ( 1+ 65200 ) ^n10 = 1.0115 ^ (n)n = 201.37Solu.pdf10 = ( 1+ 65200 ) ^n10 = 1.0115 ^ (n)n = 201.37Solu.pdf
10 = ( 1+ 65200 ) ^n10 = 1.0115 ^ (n)n = 201.37Solu.pdfinfo235816
 
1) Sugar because Payments to Labour has major component in Expenses .pdf
1) Sugar because Payments to Labour has major component in Expenses .pdf1) Sugar because Payments to Labour has major component in Expenses .pdf
1) Sugar because Payments to Labour has major component in Expenses .pdfinfo235816
 
(3)Solution(3).pdf
(3)Solution(3).pdf(3)Solution(3).pdf
(3)Solution(3).pdfinfo235816
 
Yellow fever is caused by the yellow fever virus and is spread by th.pdf
Yellow fever is caused by the yellow fever virus and is spread by th.pdfYellow fever is caused by the yellow fever virus and is spread by th.pdf
Yellow fever is caused by the yellow fever virus and is spread by th.pdfinfo235816
 
Values are so important in life. The values kids learn at a young ag.pdf
Values are so important in life. The values kids learn at a young ag.pdfValues are so important in life. The values kids learn at a young ag.pdf
Values are so important in life. The values kids learn at a young ag.pdfinfo235816
 
Time Value of Money (TVM) Value of the money does not remain same i.pdf
Time Value of Money (TVM) Value of the money does not remain same i.pdfTime Value of Money (TVM) Value of the money does not remain same i.pdf
Time Value of Money (TVM) Value of the money does not remain same i.pdfinfo235816
 
#include stdio.h #include stdlib.h int main() { int l1.pdf
#include stdio.h #include stdlib.h int main() { int l1.pdf#include stdio.h #include stdlib.h int main() { int l1.pdf
#include stdio.h #include stdlib.h int main() { int l1.pdfinfo235816
 
no it does not exists .pdf
                     no it does not exists                            .pdf                     no it does not exists                            .pdf
no it does not exists .pdfinfo235816
 
The shape is angular, because of the two pairs of.pdf
                     The shape is angular, because of the two pairs of.pdf                     The shape is angular, because of the two pairs of.pdf
The shape is angular, because of the two pairs of.pdfinfo235816
 
Step1 Moles of ions from NaCl = 2x.46 = .92 Step2.pdf
                     Step1 Moles of ions from NaCl = 2x.46 = .92 Step2.pdf                     Step1 Moles of ions from NaCl = 2x.46 = .92 Step2.pdf
Step1 Moles of ions from NaCl = 2x.46 = .92 Step2.pdfinfo235816
 
so that the reaction takes place and forms (=0 )b.pdf
                     so that the reaction takes place and forms (=0 )b.pdf                     so that the reaction takes place and forms (=0 )b.pdf
so that the reaction takes place and forms (=0 )b.pdfinfo235816
 
PV = nRT in both cases other one is taken consta.pdf
                     PV = nRT  in both cases other one is taken consta.pdf                     PV = nRT  in both cases other one is taken consta.pdf
PV = nRT in both cases other one is taken consta.pdfinfo235816
 
In case of SO2 Dipole - Dipole forces are stron.pdf
                     In case of SO2  Dipole - Dipole forces are stron.pdf                     In case of SO2  Dipole - Dipole forces are stron.pdf
In case of SO2 Dipole - Dipole forces are stron.pdfinfo235816
 
D) None of the above. first nitration takes place.pdf
                     D) None of the above. first nitration takes place.pdf                     D) None of the above. first nitration takes place.pdf
D) None of the above. first nitration takes place.pdfinfo235816
 
Decrease in Entropy (S) always occurs when the ph.pdf
                     Decrease in Entropy (S) always occurs when the ph.pdf                     Decrease in Entropy (S) always occurs when the ph.pdf
Decrease in Entropy (S) always occurs when the ph.pdfinfo235816
 
The diploid genetic system is more intersting than th monoploid gene.pdf
The diploid genetic system is more intersting than th monoploid gene.pdfThe diploid genetic system is more intersting than th monoploid gene.pdf
The diploid genetic system is more intersting than th monoploid gene.pdfinfo235816
 
Solution If r t= Multiply both the sides by -1 , we have= -.pdf
Solution  If r t= Multiply both the sides by -1 , we have= -.pdfSolution  If r t= Multiply both the sides by -1 , we have= -.pdf
Solution If r t= Multiply both the sides by -1 , we have= -.pdfinfo235816
 
Step-1Code the playing card and then sort the cards in a deck.Ste.pdf
Step-1Code the playing card and then sort the cards in a deck.Ste.pdfStep-1Code the playing card and then sort the cards in a deck.Ste.pdf
Step-1Code the playing card and then sort the cards in a deck.Ste.pdfinfo235816
 
conc . of H+ = conc. of H3O + = 0.250.17 =0.0425.pdf
                     conc . of H+ = conc. of H3O + = 0.250.17 =0.0425.pdf                     conc . of H+ = conc. of H3O + = 0.250.17 =0.0425.pdf
conc . of H+ = conc. of H3O + = 0.250.17 =0.0425.pdfinfo235816
 

More from info235816 (20)

2H2+O2=2H2O2 moles react with one mole of oxygen to give two moles.pdf
2H2+O2=2H2O2 moles react with one mole of oxygen to give two moles.pdf2H2+O2=2H2O2 moles react with one mole of oxygen to give two moles.pdf
2H2+O2=2H2O2 moles react with one mole of oxygen to give two moles.pdf
 
10 = ( 1+ 65200 ) ^n10 = 1.0115 ^ (n)n = 201.37Solu.pdf
10 = ( 1+ 65200 ) ^n10 = 1.0115 ^ (n)n = 201.37Solu.pdf10 = ( 1+ 65200 ) ^n10 = 1.0115 ^ (n)n = 201.37Solu.pdf
10 = ( 1+ 65200 ) ^n10 = 1.0115 ^ (n)n = 201.37Solu.pdf
 
1) Sugar because Payments to Labour has major component in Expenses .pdf
1) Sugar because Payments to Labour has major component in Expenses .pdf1) Sugar because Payments to Labour has major component in Expenses .pdf
1) Sugar because Payments to Labour has major component in Expenses .pdf
 
(3)Solution(3).pdf
(3)Solution(3).pdf(3)Solution(3).pdf
(3)Solution(3).pdf
 
Yellow fever is caused by the yellow fever virus and is spread by th.pdf
Yellow fever is caused by the yellow fever virus and is spread by th.pdfYellow fever is caused by the yellow fever virus and is spread by th.pdf
Yellow fever is caused by the yellow fever virus and is spread by th.pdf
 
Values are so important in life. The values kids learn at a young ag.pdf
Values are so important in life. The values kids learn at a young ag.pdfValues are so important in life. The values kids learn at a young ag.pdf
Values are so important in life. The values kids learn at a young ag.pdf
 
Time Value of Money (TVM) Value of the money does not remain same i.pdf
Time Value of Money (TVM) Value of the money does not remain same i.pdfTime Value of Money (TVM) Value of the money does not remain same i.pdf
Time Value of Money (TVM) Value of the money does not remain same i.pdf
 
#include stdio.h #include stdlib.h int main() { int l1.pdf
#include stdio.h #include stdlib.h int main() { int l1.pdf#include stdio.h #include stdlib.h int main() { int l1.pdf
#include stdio.h #include stdlib.h int main() { int l1.pdf
 
no it does not exists .pdf
                     no it does not exists                            .pdf                     no it does not exists                            .pdf
no it does not exists .pdf
 
The shape is angular, because of the two pairs of.pdf
                     The shape is angular, because of the two pairs of.pdf                     The shape is angular, because of the two pairs of.pdf
The shape is angular, because of the two pairs of.pdf
 
Step1 Moles of ions from NaCl = 2x.46 = .92 Step2.pdf
                     Step1 Moles of ions from NaCl = 2x.46 = .92 Step2.pdf                     Step1 Moles of ions from NaCl = 2x.46 = .92 Step2.pdf
Step1 Moles of ions from NaCl = 2x.46 = .92 Step2.pdf
 
so that the reaction takes place and forms (=0 )b.pdf
                     so that the reaction takes place and forms (=0 )b.pdf                     so that the reaction takes place and forms (=0 )b.pdf
so that the reaction takes place and forms (=0 )b.pdf
 
PV = nRT in both cases other one is taken consta.pdf
                     PV = nRT  in both cases other one is taken consta.pdf                     PV = nRT  in both cases other one is taken consta.pdf
PV = nRT in both cases other one is taken consta.pdf
 
In case of SO2 Dipole - Dipole forces are stron.pdf
                     In case of SO2  Dipole - Dipole forces are stron.pdf                     In case of SO2  Dipole - Dipole forces are stron.pdf
In case of SO2 Dipole - Dipole forces are stron.pdf
 
D) None of the above. first nitration takes place.pdf
                     D) None of the above. first nitration takes place.pdf                     D) None of the above. first nitration takes place.pdf
D) None of the above. first nitration takes place.pdf
 
Decrease in Entropy (S) always occurs when the ph.pdf
                     Decrease in Entropy (S) always occurs when the ph.pdf                     Decrease in Entropy (S) always occurs when the ph.pdf
Decrease in Entropy (S) always occurs when the ph.pdf
 
The diploid genetic system is more intersting than th monoploid gene.pdf
The diploid genetic system is more intersting than th monoploid gene.pdfThe diploid genetic system is more intersting than th monoploid gene.pdf
The diploid genetic system is more intersting than th monoploid gene.pdf
 
Solution If r t= Multiply both the sides by -1 , we have= -.pdf
Solution  If r t= Multiply both the sides by -1 , we have= -.pdfSolution  If r t= Multiply both the sides by -1 , we have= -.pdf
Solution If r t= Multiply both the sides by -1 , we have= -.pdf
 
Step-1Code the playing card and then sort the cards in a deck.Ste.pdf
Step-1Code the playing card and then sort the cards in a deck.Ste.pdfStep-1Code the playing card and then sort the cards in a deck.Ste.pdf
Step-1Code the playing card and then sort the cards in a deck.Ste.pdf
 
conc . of H+ = conc. of H3O + = 0.250.17 =0.0425.pdf
                     conc . of H+ = conc. of H3O + = 0.250.17 =0.0425.pdf                     conc . of H+ = conc. of H3O + = 0.250.17 =0.0425.pdf
conc . of H+ = conc. of H3O + = 0.250.17 =0.0425.pdf
 

Recently uploaded

JAPAN: ORGANISATION OF PMDA, PHARMACEUTICAL LAWS & REGULATIONS, TYPES OF REGI...
JAPAN: ORGANISATION OF PMDA, PHARMACEUTICAL LAWS & REGULATIONS, TYPES OF REGI...JAPAN: ORGANISATION OF PMDA, PHARMACEUTICAL LAWS & REGULATIONS, TYPES OF REGI...
JAPAN: ORGANISATION OF PMDA, PHARMACEUTICAL LAWS & REGULATIONS, TYPES OF REGI...anjaliyadav012327
 
Paris 2024 Olympic Geographies - an activity
Paris 2024 Olympic Geographies - an activityParis 2024 Olympic Geographies - an activity
Paris 2024 Olympic Geographies - an activityGeoBlogs
 
mini mental status format.docx
mini    mental       status     format.docxmini    mental       status     format.docx
mini mental status format.docxPoojaSen20
 
The byproduct of sericulture in different industries.pptx
The byproduct of sericulture in different industries.pptxThe byproduct of sericulture in different industries.pptx
The byproduct of sericulture in different industries.pptxShobhayan Kirtania
 
Call Girls in Dwarka Mor Delhi Contact Us 9654467111
Call Girls in Dwarka Mor Delhi Contact Us 9654467111Call Girls in Dwarka Mor Delhi Contact Us 9654467111
Call Girls in Dwarka Mor Delhi Contact Us 9654467111Sapana Sha
 
Q4-W6-Restating Informational Text Grade 3
Q4-W6-Restating Informational Text Grade 3Q4-W6-Restating Informational Text Grade 3
Q4-W6-Restating Informational Text Grade 3JemimahLaneBuaron
 
Measures of Dispersion and Variability: Range, QD, AD and SD
Measures of Dispersion and Variability: Range, QD, AD and SDMeasures of Dispersion and Variability: Range, QD, AD and SD
Measures of Dispersion and Variability: Range, QD, AD and SDThiyagu K
 
Interactive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communicationInteractive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communicationnomboosow
 
BASLIQ CURRENT LOOKBOOK LOOKBOOK(1) (1).pdf
BASLIQ CURRENT LOOKBOOK  LOOKBOOK(1) (1).pdfBASLIQ CURRENT LOOKBOOK  LOOKBOOK(1) (1).pdf
BASLIQ CURRENT LOOKBOOK LOOKBOOK(1) (1).pdfSoniaTolstoy
 
Ecosystem Interactions Class Discussion Presentation in Blue Green Lined Styl...
Ecosystem Interactions Class Discussion Presentation in Blue Green Lined Styl...Ecosystem Interactions Class Discussion Presentation in Blue Green Lined Styl...
Ecosystem Interactions Class Discussion Presentation in Blue Green Lined Styl...fonyou31
 
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...Sapna Thakur
 
microwave assisted reaction. General introduction
microwave assisted reaction. General introductionmicrowave assisted reaction. General introduction
microwave assisted reaction. General introductionMaksud Ahmed
 
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...EduSkills OECD
 
Grant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy ConsultingGrant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy ConsultingTechSoup
 
Advanced Views - Calendar View in Odoo 17
Advanced Views - Calendar View in Odoo 17Advanced Views - Calendar View in Odoo 17
Advanced Views - Calendar View in Odoo 17Celine George
 
Web & Social Media Analytics Previous Year Question Paper.pdf
Web & Social Media Analytics Previous Year Question Paper.pdfWeb & Social Media Analytics Previous Year Question Paper.pdf
Web & Social Media Analytics Previous Year Question Paper.pdfJayanti Pande
 
Software Engineering Methodologies (overview)
Software Engineering Methodologies (overview)Software Engineering Methodologies (overview)
Software Engineering Methodologies (overview)eniolaolutunde
 
Introduction to Nonprofit Accounting: The Basics
Introduction to Nonprofit Accounting: The BasicsIntroduction to Nonprofit Accounting: The Basics
Introduction to Nonprofit Accounting: The BasicsTechSoup
 

Recently uploaded (20)

JAPAN: ORGANISATION OF PMDA, PHARMACEUTICAL LAWS & REGULATIONS, TYPES OF REGI...
JAPAN: ORGANISATION OF PMDA, PHARMACEUTICAL LAWS & REGULATIONS, TYPES OF REGI...JAPAN: ORGANISATION OF PMDA, PHARMACEUTICAL LAWS & REGULATIONS, TYPES OF REGI...
JAPAN: ORGANISATION OF PMDA, PHARMACEUTICAL LAWS & REGULATIONS, TYPES OF REGI...
 
Paris 2024 Olympic Geographies - an activity
Paris 2024 Olympic Geographies - an activityParis 2024 Olympic Geographies - an activity
Paris 2024 Olympic Geographies - an activity
 
mini mental status format.docx
mini    mental       status     format.docxmini    mental       status     format.docx
mini mental status format.docx
 
Mattingly "AI & Prompt Design: Structured Data, Assistants, & RAG"
Mattingly "AI & Prompt Design: Structured Data, Assistants, & RAG"Mattingly "AI & Prompt Design: Structured Data, Assistants, & RAG"
Mattingly "AI & Prompt Design: Structured Data, Assistants, & RAG"
 
The byproduct of sericulture in different industries.pptx
The byproduct of sericulture in different industries.pptxThe byproduct of sericulture in different industries.pptx
The byproduct of sericulture in different industries.pptx
 
Call Girls in Dwarka Mor Delhi Contact Us 9654467111
Call Girls in Dwarka Mor Delhi Contact Us 9654467111Call Girls in Dwarka Mor Delhi Contact Us 9654467111
Call Girls in Dwarka Mor Delhi Contact Us 9654467111
 
Q4-W6-Restating Informational Text Grade 3
Q4-W6-Restating Informational Text Grade 3Q4-W6-Restating Informational Text Grade 3
Q4-W6-Restating Informational Text Grade 3
 
Measures of Dispersion and Variability: Range, QD, AD and SD
Measures of Dispersion and Variability: Range, QD, AD and SDMeasures of Dispersion and Variability: Range, QD, AD and SD
Measures of Dispersion and Variability: Range, QD, AD and SD
 
Interactive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communicationInteractive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communication
 
BASLIQ CURRENT LOOKBOOK LOOKBOOK(1) (1).pdf
BASLIQ CURRENT LOOKBOOK  LOOKBOOK(1) (1).pdfBASLIQ CURRENT LOOKBOOK  LOOKBOOK(1) (1).pdf
BASLIQ CURRENT LOOKBOOK LOOKBOOK(1) (1).pdf
 
Ecosystem Interactions Class Discussion Presentation in Blue Green Lined Styl...
Ecosystem Interactions Class Discussion Presentation in Blue Green Lined Styl...Ecosystem Interactions Class Discussion Presentation in Blue Green Lined Styl...
Ecosystem Interactions Class Discussion Presentation in Blue Green Lined Styl...
 
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
 
microwave assisted reaction. General introduction
microwave assisted reaction. General introductionmicrowave assisted reaction. General introduction
microwave assisted reaction. General introduction
 
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
 
Grant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy ConsultingGrant Readiness 101 TechSoup and Remy Consulting
Grant Readiness 101 TechSoup and Remy Consulting
 
Código Creativo y Arte de Software | Unidad 1
Código Creativo y Arte de Software | Unidad 1Código Creativo y Arte de Software | Unidad 1
Código Creativo y Arte de Software | Unidad 1
 
Advanced Views - Calendar View in Odoo 17
Advanced Views - Calendar View in Odoo 17Advanced Views - Calendar View in Odoo 17
Advanced Views - Calendar View in Odoo 17
 
Web & Social Media Analytics Previous Year Question Paper.pdf
Web & Social Media Analytics Previous Year Question Paper.pdfWeb & Social Media Analytics Previous Year Question Paper.pdf
Web & Social Media Analytics Previous Year Question Paper.pdf
 
Software Engineering Methodologies (overview)
Software Engineering Methodologies (overview)Software Engineering Methodologies (overview)
Software Engineering Methodologies (overview)
 
Introduction to Nonprofit Accounting: The Basics
Introduction to Nonprofit Accounting: The BasicsIntroduction to Nonprofit Accounting: The Basics
Introduction to Nonprofit Accounting: The Basics
 

Floating point basicsThe core idea of floating-point representatio.pdf

  • 1. Floating point basics The core idea of floating-point representations (as opposed to fixed point representations as used by, say, ints), is that a number x is written as m*be where m is a mantissa or fractional part, b is a base, and e is an exponent. On modern computers the base is almost always 2, and for most floating-point representations the mantissa will be scaled to be between 1 and b. This is done by adjusting the exponent, e.g. 1 = 1*20 2 = 1*21 0.375 = 1.5*2-2 etc. Iam writing a program that does some floating addition that uses bit patterns with shifts applied to the mantissa and such to obtain the sum of the two floating point numbers. Logically and on paper I can get this to compute the correct sum. Code: #include #include #include #include int isNegative (float f) { unsigned int* iptr = (unsigned int*)&f; return ( ((*iptr) & 0x80000000) ? 1:0); } unsigned char getExponent (float f) { unsigned int* iptr = (unsigned int*)&f; return (((*iptr >> 23) & 0xff) - 127); } unsigned int getMantissa (float f) { unsigned int* iptr = (unsigned int*)&f; if( *iptr == 0 ) return 0; return ((*iptr & 0xFFFFFF) | 0x800000 );
  • 2. } float sum (float left, float right) { unsigned int littleMan; unsigned int bigMan; unsigned char littleE; unsigned char bigE; unsigned char lexp = getExponent(left); unsigned char rexp = getExponent(right); int Dexponent; if (lexp > rexp) { bigE = lexp; bigMan = getMantissa(left); littleE = rexp; littleMan = getMantissa(right); } else { bigE = rexp; bigMan = getMantissa(right); littleE = lexp; littleMan = getMantissa(left); } printf("little: %x %x ", littleE, littleMan); printf("big: %x %x ", bigE, bigMan); void shift( unsigned int *valToShift, int bitsToShift ) { // Masks is used to mask out bits to check for a "sticky" bit. static unsigned masks[24] = { 0, 1, 3, 7, 0xf, 0x1f, 0x3f, 0x7f,
  • 3. 0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff }; // HOmasks - masks out the H.O. bit of the value masked by the masks entry. static unsigned HOmasks[24] = { 0, 1, 2, 4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000 }; // shiftedOut- Holds the value that will be shifted out of a mantissa // during the denormalization operation (used to round a denormalized value). int shiftedOut; assert( bitsToShift <= 23 ); // Grabs the bits we're going to shift out (so we can determine // how to round this value after the shift). shiftedOut = *valToShift & masks[ bitsToShift ]; // Shift the value to the right the specified number of bits: *valToShift = *valToShift >> bitsToShift; // If necessary, round the value: if( shiftedOut > HOmasks[ bitsToShift ] ) { // If the bits we shifted out are greater than 1/2 the L.O. bit, then
  • 4. // round the value up by one. *valToShift = *valToShift + 1; } else if( shiftedOut == HOmasks[ bitsToShift ] ) { // If the bits we shifted out are exactly 1/2 of the L.O. bit's value, // then round the value to the nearest number whose L.O. bit is zero. *valToShift = *valToShift + ((*valToShift & 1) == 1); } // else we round the value down to the previous value. The current // value is already truncated (rounded down), so we don't have to do anything. } // I got two actual floating point values. I want to add them together. // 1. "denormalize" one of the operands if their exponents aren't // the same (when adding or subtracting values, the exponents must be the same). // // Algorithm: choose the value with the smaller exponent. Shift its mantissa // to the right the number of bits specified by the difference between the two // exponents. if( rexp > lexp ) { shift( &littleMan, (rexp - lexp)); Dexponent = rexp; } else if( rexp < lexp ) { shift( &littleMan, (lexp - rexp)); Dexponent = lexp; } unsigned int result = Dexponent; float fresult = *(float*)&result;
  • 5. return(fresult); } int main() { const int SIZE = 256; char line[SIZE]; while (1) { float f1; float f2; float left = f1; float right = f2; printf("Please enter the first float ( "q" to quit):"); fgets(line,SIZE,stdin); if (toupper(line[0]) =='Q') break; f1 = atof(line); printf("Please enter the second float ( "q" to quit):"); fgets(line,SIZE,stdin); if (toupper(line[0]) == 'Q') break; f2 = atof(line); if (isNegative(f1) || isNegative(f2)) printf ("One of thse is negative, but %g + %g == %g ", f1,f2,sum(f1,f2)); else printf("%g + %g == %g ", f1,f2,sum(f1,f2)); } return(EXIT_SUCCESS);
  • 6. 1 = 1*20 2 = 1*21 0.375 = 1.5*2-2 Solution Floating point basics The core idea of floating-point representations (as opposed to fixed point representations as used by, say, ints), is that a number x is written as m*be where m is a mantissa or fractional part, b is a base, and e is an exponent. On modern computers the base is almost always 2, and for most floating-point representations the mantissa will be scaled to be between 1 and b. This is done by adjusting the exponent, e.g. 1 = 1*20 2 = 1*21 0.375 = 1.5*2-2 etc. Iam writing a program that does some floating addition that uses bit patterns with shifts applied to the mantissa and such to obtain the sum of the two floating point numbers. Logically and on paper I can get this to compute the correct sum. Code: #include #include #include #include int isNegative (float f) { unsigned int* iptr = (unsigned int*)&f; return ( ((*iptr) & 0x80000000) ? 1:0); } unsigned char getExponent (float f) { unsigned int* iptr = (unsigned int*)&f; return (((*iptr >> 23) & 0xff) - 127); } unsigned int getMantissa (float f)
  • 7. { unsigned int* iptr = (unsigned int*)&f; if( *iptr == 0 ) return 0; return ((*iptr & 0xFFFFFF) | 0x800000 ); } float sum (float left, float right) { unsigned int littleMan; unsigned int bigMan; unsigned char littleE; unsigned char bigE; unsigned char lexp = getExponent(left); unsigned char rexp = getExponent(right); int Dexponent; if (lexp > rexp) { bigE = lexp; bigMan = getMantissa(left); littleE = rexp; littleMan = getMantissa(right); } else { bigE = rexp; bigMan = getMantissa(right); littleE = lexp; littleMan = getMantissa(left); } printf("little: %x %x ", littleE, littleMan); printf("big: %x %x ", bigE, bigMan); void shift( unsigned int *valToShift, int bitsToShift ) {
  • 8. // Masks is used to mask out bits to check for a "sticky" bit. static unsigned masks[24] = { 0, 1, 3, 7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff }; // HOmasks - masks out the H.O. bit of the value masked by the masks entry. static unsigned HOmasks[24] = { 0, 1, 2, 4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000 }; // shiftedOut- Holds the value that will be shifted out of a mantissa // during the denormalization operation (used to round a denormalized value). int shiftedOut; assert( bitsToShift <= 23 ); // Grabs the bits we're going to shift out (so we can determine // how to round this value after the shift). shiftedOut = *valToShift & masks[ bitsToShift ]; // Shift the value to the right the specified number of bits: *valToShift = *valToShift >> bitsToShift;
  • 9. // If necessary, round the value: if( shiftedOut > HOmasks[ bitsToShift ] ) { // If the bits we shifted out are greater than 1/2 the L.O. bit, then // round the value up by one. *valToShift = *valToShift + 1; } else if( shiftedOut == HOmasks[ bitsToShift ] ) { // If the bits we shifted out are exactly 1/2 of the L.O. bit's value, // then round the value to the nearest number whose L.O. bit is zero. *valToShift = *valToShift + ((*valToShift & 1) == 1); } // else we round the value down to the previous value. The current // value is already truncated (rounded down), so we don't have to do anything. } // I got two actual floating point values. I want to add them together. // 1. "denormalize" one of the operands if their exponents aren't // the same (when adding or subtracting values, the exponents must be the same). // // Algorithm: choose the value with the smaller exponent. Shift its mantissa // to the right the number of bits specified by the difference between the two // exponents. if( rexp > lexp ) { shift( &littleMan, (rexp - lexp)); Dexponent = rexp; } else if( rexp < lexp ) { shift( &littleMan, (lexp - rexp)); Dexponent = lexp;
  • 10. } unsigned int result = Dexponent; float fresult = *(float*)&result; return(fresult); } int main() { const int SIZE = 256; char line[SIZE]; while (1) { float f1; float f2; float left = f1; float right = f2; printf("Please enter the first float ( "q" to quit):"); fgets(line,SIZE,stdin); if (toupper(line[0]) =='Q') break; f1 = atof(line); printf("Please enter the second float ( "q" to quit):"); fgets(line,SIZE,stdin); if (toupper(line[0]) == 'Q') break; f2 = atof(line); if (isNegative(f1) || isNegative(f2))
  • 11. printf ("One of thse is negative, but %g + %g == %g ", f1,f2,sum(f1,f2)); else printf("%g + %g == %g ", f1,f2,sum(f1,f2)); } return(EXIT_SUCCESS); 1 = 1*20 2 = 1*21 0.375 = 1.5*2-2