SlideShare a Scribd company logo
C++0X Standard C++ Standard Library Extensions Technical Report 1 (TR1) Advanced C++ Runtime Improvement Techniques Gyuszi Suto November 2009
References ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
What’s in C++0x ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],/usr/intel/pkgs/icc/11.1.046e/bin/intel64/icc -std=c++0x -Wl,-rpath,/usr/intel/pkgs/gcc/4.4.0/lib64 /usr/intel/pkgs/gcc/4.4.0/bin/g++ -std=c++0x  in namespace  std::tr1::
Course Content ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
Type inference, decltype, auto ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
Type inference, decltype, auto ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
Lambda []  (int x) ->  int { return x; }  (10); capture clause empty – no local variables can be accessed = local variables accessed by value, not lvalue & local variables accessed by reference, lvalue formal parameters to lambda function specifies return type  (not req’d if the entire function body  is contained within one return statemet) body of lambda function actual argument(s) passed to lambda function
Lambda int main(void) { // lambda function, no access to local variables auto lambdaf3 = [] (int i) { return i+3; }; cout << lambdaf3(4) << &quot;&quot;; // prints 7 // accesses local variable by reference (and modifies it) int localv = 9; auto lambdaf4 = [&] (int i) { return localv = i; }; cout << lambdaf4(7) << &quot;, &quot; << localv << endl; // modifies localv, prints 7, 7 int localv2 = 11; auto lambdaf5 = [] (int i) { return i + localv2; }; // error, don't know what localv2 is cout << lambdaf5(4) << endl;  int localv3 = 13; auto lambdaf6 = [=] (int i) { return i + localv3; }; // passing local context by value cout << lambdaf6(5) << endl; // prints 18 }
Lambda int main(void) { int localv3 = 13;  auto lambdaf7 = [=] (int i) { return localv3 += i; }; // error, localv3 is not a modifiable  l value  int a = 3, b = 4; auto lambdaf8 = [=, &b] (int i) { return 3 + (b  += (a + i)); }; cout << lambdaf8(3) << &quot;, &quot; << b << endl; // accesses a by value, b by ref, prints 13, 10 array<int, 5> ia =  { 9, -3, 2, 13, -7}; // defines and uses a lambda in the context of a standard sort std::sort(ia.begin(), ia.end(),  [] (int a, int b) { return std::abs(a) < std::abs(b); } ); print_collection(ia); // prints 2 -3 -7 9 13 }
Lambda int pivot = 5;  // global variable int main(void) { array<int, 5> ia =  { 9, -3, 2, 13, -7}; // defines a named lambda function, used later inside sort auto lambdaf1 = [] (int a, int b) { return std::abs(a) < std::abs(b); }; std::sort(ia.begin(), ia.end(),  lambdaf1 ); print_collection(ia); // prints 2 -3 -7 9 13 // pivot global in this case std::sort(ia.begin(), ia.end(),  [] (int a, int b) { return std::abs(a - pivot) < std::abs(b - pivot); } ); print_collection(ia); // prints 2 9 -3 13 -7 int pivot2 = 8; // pivot2 local variable pass by ref std::sort(ia.begin(), ia.end(),  [pivot2] (int a, int b) { return std::abs(a - pivot2) < std::abs(b - pivot2); } );  print_collection(ia); // prints 9 13 2 -3 -7  return 0; }
Lambda class X{ public: int a, b;  // class member data // constructor X(int aa = 0, int bb = 2) : a(aa), b(bb) {} void memfun1 (void){ int c = 2, d = 3;  // local variables // lambda function definition and call [&, d] (int k) -> void {  cout << a << &quot;, &quot; << b << &quot;, &quot; << c << &quot;, &quot; << d << &quot;, &quot; << (c+=k) << endl; return; } (7); } void memfun2(void) { // named lambda function, accesses this auto lambda12 = [this] (int k) { return this + k; }; cout << lambda12(0xFF) << endl; // named lambda function, tries to access by reference, compiler warning ! auto lambda13 = [&this] (int k) -> X * { a += 3;  return this + k; }; cout << lambda13(0xEE) << endl; } } int main(void) { X x1; cout << &quot;in X&quot; << endl; x1.memfun1();  // prints 0, 2, 2, 3, 9 cout << x1.a << &quot; before the call&quot; << endl; // prints 0 x1.memfun2(); // prints 0x7fbfffcd00 cout << x1.a << &quot; after the call&quot; << endl; // prints 3 }
Lambda ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
Un-named lambda, named lambda and function  void foo() { int i, j; // local variables … need a function here, called once no other place needs it make it unnamed lambda it will know the context i, j … need a function here that knows the context  i, j, and it will be called here and in  other locations of this function make it a named lambda … need to call a function here this function may be called from other functions and it does not know about the context of this function foo() so make it an outside function x() and call it here } // this function does no know about  // the stack frame of the callee // if the caller needs to pass its context, then // it needs to send it in via function arguments  void xx() { int k, l, m; … }
Template Traits  template <class T> class MMAX { public: enum {mmax = 10 }; }; template<> class MMAX<int> { public: enum {mmax = 1000 }; }; template <> class MMAX<float> { public: enum {mmax = 20000 }; }; template <class V, class TRAIT = MMAX<V> > class Luba{ public: V v[TRAIT::mmax]; }; int main(void) { Luba<int, MMAX<int> > myLuba; Luba<float> myLuba2; return 0; } template trait template default type argument relying on mmax enum of TRAIT Type V and type TRAIT are decoupled
Template Traits cont’d const char * table[] = { “ Unknown”,  // 0 “ Int”, // 1 “ Float”, // 2 “ X” // 3 }; template <class T> class Trait { public: static const int index = 0; }; template<> class Trait<int> { public: static const int index = 1; }; template<> class Trait<X> { public: static const int index = 3; }; template<class T, class R = Trait<T> > class W { public: T val; static const char * name () { return table[R::index] ; } }; … cout << W<int>::name();  // prints “Int”, relies on default argument of Trait<int> cout << W<X, Trait<X> >::name();   // prints “X” cout << W<char, Trait<char> >::name();  // prints “Unknown” cout << W<double, Trait<int> >::name(); // prints “Int” !! Template specialization Generic Template
Template Metaprogramming template<int N> class Factorial {  public:  enum { value = N * Factorial<N-1>::value };  };  class Factorial<1> {  // template specialization for 1 public: enum { value = 1 };  }; … // evaluated at compile-time std::cout << Factorial<3>::value ; // prints 6 int a[Factorial<4>::value];  // 24  template <unsigned long N>  struct binary {  static unsigned const  value = binary<N/10>::value * 2 // prepend higher bits + N%10;  // to lowest bit  };  template <> // specialization struct  struct binary<0>  // terminates recursion  {  static unsigned const value = 0;  };  int b[binary<1101>::value]; // compile-time evaluation to 13 Done at compile - time
Variadic Templates #include <iostream> #include <typeinfo> template<int size, int... ints> // packing ints class X{ public: void foo(void) { int x[size] = {ints...}; // unpacking ints for(int i = 0; i < size; ++i){ std::cout << x[i] << &quot; &quot;; } std::cout << &quot;&quot;; } }; int main(void) { X<3, 8, 9, 10> x1;; // prints 8 9 10 X<4, 11, 12, 13, 14> x2;; // prints 11 12 13 14 return 0; }
Variadic Templates void myprintf(const char * s) {} // no-op template<typename T, typename... Args> void myprintf(const char* s, T value, Args... args) // args are packed { while (*s) { if (*s == '%' && *(++s) != '%') { std::cout << value; // args are unpacked below myprintf(s, args...); // call even when *s == 0 to detect extra arguments return; } std::cout << *s++; } } #if 0 %nm a.out | grep myprintf 00000000004009cb t _GLOBAL__I__Z8myprintfPKc 0000000000400b0c W _Z8myprintfIdIEEvPKcT_DpT0_ 0000000000400a7a W _Z8myprintfIfIdEEvPKcT_DpT0_ 00000000004009e0 W _Z8myprintfIiIfdEEvPKcT_DpT0_ 00000000004008ec T _Z8myprintfPKc  #endif int main(void) { int i = 5; float f = 6.6; double d = 9.9; myprintf(&quot;i=%  f=%  d=%&quot;, i, f, d); // prints i=5  f=6.6  d=9.9 myprintf(&quot;i=% f=%&quot;, i, f, d); // prints i=5  f=6.6  myprintf(&quot;i=% f=% d=% d=%&quot;, i, f, d); // prints i=5 f=6.6 d=9.9  return 0; }
Variadic Templates class X { int i; }; class Y { float f; }; class Z { double d; }; // forward declaration template <typename... E> struct TI; // template specialization for no type argument template <> struct TI<> { }; // general (and recursive) template class definition template <typename H, typename... T>  // packing type T struct TI<H, T...> : public H, public TI<T...> // unpacking type T {}; int main(void) { cout << sizeof(TI<X, Y, Z>) << endl; // prints 24 cout << sizeof(TI<vector<int>, X, Y>) << endl; // prints 32 return 0; } vector<int> X Y multiple inheritance This is an example of the building block of tuples (TupleImplement TI) using variadic templates. The example is just a small piece of the actual tuple implementation.
tuple<T0, T1, T2, .., T9>  the std::pair<T1, T2> on steroids pair<int, float> p(3, 9.9); cout << p.first << endl; // print 3 pair<int, pair<float, double> > pxx(5, make_pair(8.8, 11.11)); cout << pxx.second.first << endl; // print 8.8 tuple<int> ti(2); c out << get<0>(ti) << endl; // print 2 tuple<int, float, char> tifc(3, 9.9, 'c'); cout << get<2>(tifc) << endl; // print 'c‘ tuple<int, tuple<char, char, char>, float, int> txxx(3, make_tuple('c', 'd', 'e'), 13.33, 7); cout << get<2>(get<1>(txxx)) << endl; // print 'e' int char char float int char
tuple constructing The tuple constructor takes the tuple elements as arguments. For an n-element tuple, the constructor can be invoked with k arguments, where 0 <= k <= 9. For example:  tuple<> t; // sizeof == 2 tuple <char> tc; // sizeof == 3 tuple <char, char> tcc; // sizeof == 3 tuple<int, int> t2; // sizeof == 12 tuple<int, int, int, int, int, int, int, int, int> t9; // sizeof == 40 If no initial value for an element is provided, it is default initialized (and hence must be default initializable). For example.  class X{ public: X::X(const string & s); // the only ct for X }; tuple<X, X, X> t4; // error, no default ct for X  tuple<X, X, X> t5(string(&quot;Jaba&quot;), string(&quot;Daba&quot;), string(&quot;Duu&quot;)); // ok
tuple constructing References must be constructed explicitly, for example:  tuple<double &> t(4.2); // error, cannot refer to a temporary value tuple<const double &> tdcr(8.0); // OK const tuple<double &> ctdr(8.0); // Error double d = 3.8; tuple<double &> t(d);  // OK cout << typeid(t).name(); // prints:  class std::tr1::tuple<double &, Nil, Nil, Nil, Nil, Nil, Nil, Nil, Nil> tuple<double &> t2(d + 0.2); // error  cannot initialize non-const reference with a temporary tuple<const double &> t3(d + 0.3); //  ok
make_tuple make_tuple – a more convenient way to create tuples no need to specify the types types are deduced to the plain, value-based, non-reference type   int i = 3; float f = 5.5; make_tuple(i, f); // makes a tuple<int,  float > make_tuple(8, 9.9); // makes a tuple<int, double> tuple<int, int, double> add_multiply_divide(int a, int b)  {  return make_tuple(a+b, a*b, double(a)/double(b));  // results in tuple<int, int, double> }  class A{}; class B{}; void foo(const A & a,  B & b)  { make_tuple(a, b);  // results in tuple< class  A,  class  B>  - types reduced to plain, non-ref type }
make_tuple, ref, cref // ref and cref are reference wrappers from <functional> header file - see later A a; B b;  const A ca = a; make_tuple(cref(a), b);  // creates tuple<const A&, B> make_tuple(ref(a), b);  // creates tuple<A&, B> make_tuple(ref(a), cref(b)); // creates tuple<A&, const B&> make_tuple(cref(ca));  // creates tuple<const A&> make_tuple(ref(ca));  // creates tuple< const  A&> char aa, b b; make_tuple(aa, bb);   // creates tuple<char, char> sizeof == 3 make_tuple(ref(aa), ref(bb)) ; // creates tuple<char &, char &>  sizeof == 12 int iii = 22, jjj = 100; cout <<  (get<0>(make_tuple(ref(iii), ref(jjj))) = 66)  << &quot; iii &quot; << iii << &quot;&quot;; // creates a tuple<int &, int &> // first element refers to iii  // prints 66 iii 66 int & int &
tuple, accessing elements ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
ties - tuples Ties are tuples, where all elements are of non-const reference types.  int i; char c; double d;  ... tie(i, c, a); The above tie function creates a tuple of type  tuple<int&, char&, double&>; The same result could be achieved with the call  make_tuple(ref(i), ref(c), ref(a)); A tuple that contains non-const references as elements can be used to 'unpack' another tuple into variables. e.g.:  char c; double d;  int i;  tie(i, c, d) = make_tuple(1,'a', 5.5); std::cout << i << &quot; &quot; <<  c << &quot; &quot; << d; // prints 1  ‘a’  5.5 d 0x8000 c 0x8004 i 0x8008 int & 0x8008 char & 0x8004 double & 0x8000 tie(i, c, d) = make_tuple(1,'a', 5.5);  int 1 char ‘a’ double 5.5 temporary (lvalue) temporary stack variables non-temporary
ties - tuples Ignore There is also an object called ignore which allows you to ignore an element assigned by a tuple. The idea is that a function may return a tuple, only part of which you are interested in. For example int i; char c  = ‘x’ ; double d;  tuple<int, char, double> tup(2, 'b', 6.6); tie(i, ignore, d) = tup; cout << i << &quot; &quot; <<  c << &quot; &quot; << d;  // prints 2 ‘x’ 6.6 d 0x8000 c 0x8004 i 0x8008 int & 0x8008 double & 0x8000 tie(i, ignore, d) = tup; int 2 char ‘b’ double 6.6 temporary (lvalue) stack variables non-temporary tup 0x800C stack variable // tying to a std::pair int i; char c  = ‘x’ ;  pair<int, char> pp(33, ‘w’); tie(i, c) = pp; // doesn’t compile (??) tie(i, c) = tuple<int, char>(pp); // works OK
tuples - performance All tuple access and construction functions are small inlined one-liners. Therefore, a decent compiler can eliminate any extra cost of using tuples compared to using hand written tuple like classes. Particularly, with a decent compiler there is no performance difference between this code: class hand_made_tuple {  A a; B b; C c; public: hand_made_tuple(const A& aa, const B& bb, const C& cc)  : a(aa), b(bb), c(cc) {}; A& getA() { return a; }; B& getB() { return b; }; C& getC() { return c; }; }; hand_made_tuple hmt(A(), B(), C());  hmt.getA(); hmt.getB(); hmt.getC(); and this code:   tuple<A, B, C> t(A(), B(), C()); t.get<0>(); t.get<1>(); t.get<2>();   There’s a memory overhead of 1-3 bytes per tuple – may be compiler and/or optimization level dependent Compiler error messages are very hard to understand!
Recap auto_ptr<T> ap0 X ap0 X ap1 ap0 X ap1 auto_ptr<X> ap0(new X()); auto_ptr<X> ap1; ap1 = ap0; // ap0 releases X // ap1 destructor // calls X destructor 0 0 One object can only be pointed to by max one auto pointer When the auto pointer goes out of scope, it deletes the object it points to Auto pointer was accepted hastily into the standard, the ANSI committee agrees it was a mistake ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
Usage of - and Issues with - auto_ptr<T> class X{…}; int main(void) { const auto_ptr<X> cap(new X(4)); auto_ptr<X> bb = cap; // compiler error, no copy semantics from const vector<auto_ptr<X> > xv; try{ auto_ptr<X> ap1(new X(3)); // ap1 points to an X xv.push_back(ap1); // compile error: no copy constructor for auto ptr { auto_ptr<X> ap3, ap4; // both point to null ap3 = ap1; // ap1.release() is called internally, ap3 points to X ++ap3; // compiler error, cannot increment auto pointer ap1->x = 7; // this throws !! ap4 = ap3; // ap3.release(), ap4 points to X X & xr = *ap4; // both ap4 and xr refer to same X X * xp = ap4; // compiler error X * xp2 = ap4.get(); // OK, uses auto_ptr.get() member function to get an X* } // ap4 goes out of scope, X is destroyed ap1->x = 3; // throws } catch (...) { cout << &quot;caught throw &quot;; } return 0; }
shared_ptr  shared_ptr<X> p0; auto_ptr<X> ap = new X; p0.unique() -> false (garbage?) p0.use_count() -> 0 (garbage?) p0 -> false p0 ap X 1 p0 = ap; p0.unique() -> true p0.use_count() -> 1 p0 -> true p0 ap X 2 shared_ptr<X> p1 = p0; X * xp = p0.get(); p0.unique() -> false p0.use_count() -> 2 xp not counted p0 -> true p0 p1 X 3 shared_ptr<X> p2 = p1; p0.unique() -> false p0.use_count() -> 3 p0 -> true p0 p1 X 4 p2 p0.reset(); p1.unique() -> false p1.use_count() -> 2 p1 -> true p0 p1 X 5 p2 p2.reset(new X); p1.unique() -> true p1.use_count() -> 1 p1 -> true p0 p1 X 6 p2 X p1.swap(p2); p1.unique() -> true p1.use_count() -> 1 p1 -> true p0 p1 X 7 p2 X p2 = p1; p1.unique() -> false p1.use_count() -> 2 p1 -> true p0 p1 X 8 p2 X destructor call // p0, p1 and p2 go out of // scope // p0 destructor // p1 destructor // X destructor  // finally p2 destructor p0 p1 9 p2 X destructor call xp
shared_ptr convertible pointers shared_ptr<X> p0(new X(33, 44)); p0.unique() -> true p0.use_count() -> 1 p0 X 1 shared_ptr<void> vp(p0); // X* is convertible to void* p0.use_count() -> 2 vp.use_count() -> 2 p0 vp X 2 p0.reset(); p0.use_count() -> 0 vp.use_count() -> 1 p0 vp X 3 vp.reset(); // vp knows what destructor to call // b/c the s_p<T> constructor is // templated on the argument (2 templates) p0.use_count() -> 0 vp.use_count() -> 0 p0 vp X 4 // X destructor shared_ptr<void> vp2(new X(1, 2)); vp2.use_count() -> 1 vp2 X 5 shared_ptr<void> vp3(vp2); vp2.use_count() -> 2 vp2 X 6 vp3 shared_ptr<X> p4(vp2); // Error vp2 has no type knowledge to allow construction of p4 vp2 X 7 vp3 p4 shared_ptr<X> p5(*(reinterpret_cast<shared_ptr<X> *>(&vp2))); // OK,ugly vp2 X 8 vp3 p5 vp2.use_count() -> 3
shared_ptr X _Ptr _Rep _Ref_cnt_base 4bytes 4bytes 16 bytes _Ptr _Rep 4bytes 4bytes _Ptr _Rep 4bytes 4bytes p0 p1 v0 X _Ptr _Rep _Ref_cnt_base 4bytes 4bytes 16 bytes p2 shared_ptr<X> p0(new X); shared_ptr<X> p1(p0); shared_ptr<void> v0(p0); shared_ptr<x> p2(new X); Note: 16 byte overhead per object, 4 byte overhead per pointer (may be compiler dependent) v0.get() returns _Ptr (X *)
shared_ptr<T>  pointing to objects on heap, stack, static memory class X{ public: X(const string &); // constructor X(const X &);  // copy constructor ~X();  // destructor }; // global object in static memory X xg2(&quot;def&quot;); // global shared X pointer in static memory shared_ptr<X> gsp;  // currently null void testSharedPtrToAuto(void) { shared_ptr<X> sp1(new X(&quot;abc&quot;));  // sp1 points to X on heap shared_ptr<X> sp2(&xg2);   // sp2 points to xg2, compiles but dangerous!! shared_ptr<X> sp3;   // sp3 set to null // sp3 = &xg2;  //  compiler  error X localX(“def”); // one X on stack // gsp = &localX;  // compiler error shared_ptr<X> sp4(&localX); // compiles, dangerous, 2 destructor calls to same object!! // when sp2 goes out of scope, it calls the destructor on xg2, bad, crash! }
shared_ptr<T> API // comes with an overhead of an extra integer // use it across multiple parties/modules, when there’s no clear ownership of shared data template<class T> class shared_ptr { public: typedef T element_type; shared_ptr(); // never throws // Constructs a  shared_ptr  that  owns  the pointer  p . template<class Y> explicit shared_ptr(Y * p);  //  Constructs a  shared_ptr  that  owns  the pointer  p  and the deleter  d .  template<class Y, class D> shared_ptr(Y * p, D d);  ~shared_ptr(); // never throws shared_ptr(shared_ptr const & r); // never throws template<class Y> shared_ptr(shared_ptr<Y> const & r); // never throws template<class Y> explicit shared_ptr(weak_ptr<Y> const & r); //  clarify how one type is converted to other template<class Y> explicit shared_ptr(std::auto_ptr<Y> & r); shared_ptr & operator=(shared_ptr const & r); // never throws  template<class Y> shared_ptr & operator=(shared_ptr<Y> const & r); // never throws template<class Y> shared_ptr & operator=(std::auto_ptr<Y> & r); void reset(); // never throws template<class Y> void reset(Y * p); template<class Y, class D> void reset(Y * p, D d); T & operator*() const; // never throws T * operator->() const; // never throws T * get() const; // never throws bool unique() const; // never throws long use_count() const; // never throws operator unspecified-bool-type() const; // never throws void swap(shared_ptr & b); // never throws };
Loops with shared pointers struct L { Data d; shared_ptr<M> mptr; }; struct M { Stuff s; shared_ptr<L> lptr; }; p1.use_count()  -> 2 lptr.use_count() -> 2 mptr.use_count() -> 1 p1 d mptr s lptr L M In case p1 goes out of scope, you lose your only handle to these two structures, and they are left in memory with  no way of ever freeing them! p1 d mptr s lptr L M p1.use_count()  -> 0 lptr.use_count() -> 1 mptr.use_count() -> 1
weak_ptr struct L { Data d; shared_ptr<M> mptr; }; struct M { Stuff s; weak_ptr <L> lptr; }; p1.use_count()  -> 1 lptr.use_count() -> 1 mptr.use_count() -> 1 p1 d mptr s lptr L M p1 d mptr s lptr L M p1.use_count()  -> 0 lptr.use_count() -> 0 mptr.use_count() -> 1 d mptr s lptr L M lptr.use_count() -> 0 mptr.use_count() -> 0 s lptr L M The  weak_ptr  class template stores a &quot;weak reference&quot;  to an object that's already managed by a  shared_ptr .
weak_ptr behavior shared_ptr<X> p1(new X); shared_ptr<X> p2 = p1; weak_ptr<X> w0 = p1; w0.unique() -> false w0.use_count() -> 2 w0 -> true p1 X 1 p2 w0 weak_ptr<X> w1 = w0; w0.unique() -> false w0.use_count() -> 2 p1 X 2 p2 w0 w1 // p1 goes out of scope w0.unique() -> true w0.use_count() -> 1 p1 X 3 p2 w0 w1 // w1 goes out of scope w0.unique() -> true w0.use_count() -> 1 X 4 p2 w0 w1 // p2 goes out of scope w0.unique() -> ?? w0.use_count() ->  0 w0.expired() -> true w0 -> false X 5 p2 w0 you cannot have a weak pointer pointing to an object that’s not pointed to by a shared_ptr w0 X this cannot happen A weak_ptr will never call the destructor of the object it points to. U se the weak pointer as an observer to data owned and managed by shared pointer(s)
weak_ptr behavior shared_ptr<X> p1(new X); weak_ptr<X> w0 = p1; p1.unique() -> true w0.use_count() -> 1 w0 -> true p1 X 1 w0 p1.unique() -> false w0.use_count() -> 2 p1 X 2 p3 w0 //shared_ptr<X> p2 = w0; // error shared_ptr<X> p3(w0); // explicit // or shared_ptr<X> p3 = w0.lock(); p1 X 3 p3 w0 shared_ptr<X> p4(new X);  p4 X p1 X 4 p3 w0 weak_ptr<X> w1 = p4;  p4 X w1 p1 X 5 p3 w0 p1.swap(p4);  p4 X w1 p1 X 5 p3 w0 w0.swap(w1);  p4 X w1 // w1.lock() == p3 == p4
weak_ptr API template<class T> class weak_ptr { public: typedef T element_type; weak_ptr(); template<class Y> weak_ptr(shared_ptr<Y> const & r); weak_ptr(weak_ptr const & r); template<class Y> weak_ptr(weak_ptr<Y> const & r); ~weak_ptr(); weak_ptr & operator=(weak_ptr const & r); template<class Y> weak_ptr & operator=(weak_ptr<Y> const & r); template<class Y> weak_ptr & operator=(shared_ptr<Y> const & r); long use_count() const; bool expired() const; shared_ptr<T> lock() const; void reset(); void swap(weak_ptr<T> & b); };
Recap C-style array const string def = “default_string”; class X{ public: string st; X(const string & s = def} : s(st) {} ~X() {} }; X c[3]; // initializes 3 default Xs on static memory int main(void) { int i[3]; // initializes 3 ints on stack memory X  m[3]; // initializes 3 default Xs on stack memory X p[3] = {“I”, X(“ am “)}; // “I” “ am “ “default_string” // 6 X destructor calls for m[] and p[] return 0; }  // after main terminates // 3 X destructor calls for c[] ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
Recap C-style arrays; Overloading  operators  new[]  and  delete[]  in a   plain old data class  (POD) class X{ public: int luba; static char BUF[10000]; static int cnt; void set(int ii) { luba = ii;} void * operator new (size_t sz){ void * retval = &BUF[cnt]; cnt += sz; return retval; } void * operator new [] (size_t sz){ void * retval = &BUF[cnt]; cnt += sz; return retval; } void operator delete(void * todel){ // currently is a no-op } void operator delete [] (void * todel){ // currently is a no-op } }; char X::BUF[10000]; int X::cnt = 0; int main(void) { X * x1 = new X; x1->set(1); X * xa = new X[3]; xa[0].set(4); xa[1].set(4); xa[2].set(4); X * xaa = new X[2]; xaa[0].set(7); xaa[1].set(7); delete x1; delete [] xa; delete [] xaa; return 1; } x1 1 4 4 4 7 7 BUF xa[0] xaa[0] New operator  calls operator new(4) New operator [3] calls operator new[](12) Heap memory
C++ Classes; Overloading  operators  new[]  and  delete[]  in a class with constructor (non POD) class X{ public: int luba; static char BUF[10000]; static int cnt; void set(int ii) { luba = ii;} void * operator new (size_t sz){ void * retval = &BUF[cnt]; cnt += sz; return retval; } void * operator new [] (size_t sz){ void * retval = &BUF[cnt]; cnt += sz; return retval; } void operator delete(void * todel){ // currently is a no-op } void operator delete [] (void * todel){ // currently is a no-op } X(int ii = 9) : luba(ii){} ~X(void) { luba = -1;} }; char X::BUF[10000]; int X::cnt = 0; int main(void) { X * x1 = new X; x1->set(1); X * xa = new X[3]; xa[0].set(4); xa[1].set(4); xa[2].set(4); X * xaa = new X[2]; xaa[0].set(7); xaa[1].set(7); delete x1; delete [] xa; return 1; } x1 1 3 0 4 4 4 2 0 7 7 BUF New operator [3] calls operator new[](20) Allocates from address A, but returns address B !! Number of elements in xa[] xa[0] Number of elements in xaa[] xaa[0] A B Heap memory
array<class T, size_t N> array<X, 3> a; // initializes 3 default Xs  array<X, 3> b{“I”, “am“}; // “I” “am” “default_string” array<X, 3> d(b); // copy constructor array<X, 3> e = {X(&quot;how&quot;), X(&quot;are&quot;), X(&quot;you?&quot;)}; array<X, 3> f = {&quot;I&quot;, &quot;am&quot;, &quot;fine.&quot;}; e.swap(f); array<X, 4> g; g.swap(f);  // compiler error, bad argument type f.swap(g);  // compiler error, bad argument type how are you? I am fine. e f default_string a default_string default_string I b am default_string I d am default_string g f cannot be swapped
array<class T, size_t N> It has all the standard iterators  T & front() T & back() iterator begin() iterator end() reverse_iterator rend() reverse_iterator rbegin()
array<class T, size_t N> array<X, 3> a = {“The”, “Seinfeld”, “show”};  a.assign(“blah”); // error, need X as argument a.assign(X(“yadda”)); // OK, set all 3 elements a[0]; // returns reference to elem 0 a[3]; // returns reference to non-existing elem – no bounds check; // bounds-checked, returns reference to elem 0; // throws std::out_of_range exception a.size(); // returns 3 a.max_size(); // return big number a.empty(); // return false array<X, 0> x; x.empty(); // returns true a[i];  // i is runtime variable The Seinfeld show yadda yadda yadda yadda yadda yadda a[0] a[3] yadda yadda yadda unchecked checked throws
array<class T, size_t N> array<X, 3> a = {“The”, “Seinfeld”, “show”};  // tuple-like element access get<1>(a); // return Seinfeld get<3>(a); // compile time error, out of bounds get<i>(a); // OK, if i has a compile-time value; // returns the address of a[0] // 2D array array<array<int, 3>, 4> myarr2d2; myarr2d2[3][2] = 3; // 3D array array<array<array<int, 3>, 12>, 33> my3Darray; // etc.
forward_list ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
forward_list ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
forward_list ,[object Object],[object Object]
forward_list ,[object Object]
rvalue reference ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
rvalue reference ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],ct char * a ct char * bb ct char * ccc ct char * dddd op + returning by value ct with size: 5 deep copy ct with size5 op + returning by value ct with size: 9 deep copy ct with size9 op = deep copy 3 deep copies!
rvalue reference ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],ct char * a ct char * bb ct char * ccc ct char * dddd op + returning by value ct with size: 5 copy ct shallow op + returning by value ct with size: 9 copy ct shallow shallow op= No deep copies !!
rvalue reference ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],a b tmp time a b tmp time
rvalue reference ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
rvalue reference ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
rvalue reference ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
Recap: Custom Comparators for STL containers struct Z{ int i, j; Z(int ii, int jj) : i(ii), j(jj) {} bool operator < (const Z & rs) const { if(i < rs.i) return true; if(i == rs.i) return (j < rs.j); return false; } }; void bar() { set<Z> xs; // using Z::op< by default xs.insert(Z(3, 4)); xs.insert(Z(3, 5)); xs.insert(Z(2, 6)); xs.insert(Z(2, 6)); // (2, 6)(3, 4)(3, 5) is the order // one element (2, 6) is dropped (==) } Conditions that the comparator must satisfy: Irreflexivity f(x, x) must be false.  Antisymmetry f(x, y) implies !f(y, x)  Transitivity f(x, y) and f(y, z) imply f(x, z). Equivalence  !f(x, y) and !f(y, x) implies x == y a < a  -> false a < b  -> !(b < a) a < b  && b < c  ->  a < c !(a < b) && !(b < a)  ->  a == b
Recap: Custom Comparators for STL containers struct Z{ int i, j; Z(int ii, int jj) : i(ii), j(jj) {} }; class ZComp : public binary_function<const Z &, const Z &, bool> { public: bool operator() (const Z & l, const Z & r) { if(l.i < r.i) return true; if(l.i == r.i) return (l.j < r.j); return false; } }; void bar() { set<Z, ZComp> xs; xs.insert(Z(3, 4)); xs.insert(Z(3, 5)); xs.insert(Z(2, 6)); xs.insert(Z(2, 6)); // (2, 6)(3, 4)(3, 5) is the order // one element (2, 6) is dropped (==) } ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],// an uglier alternative bool zff(const Z & l, const Z & r) { if(l.i < r.i) return true; if(l.i == r.i) return (l.j < r.j); return false; }  void bar() { set<Z, bool (*) (const Z &, const Z &)> xs(zff); } function pointer type declaration the function  pointer
Recap: More sophisticated comparator predicates class MyintComparator : public binary_function<int, int, bool> { int referenceArgument_; public: explicit MyintComparator(int  refArg) : referenceArgument_(refArg) {} bool operator () (int l, int r) { return std::abs(l - referenceArgument_) < std::abs(r - referenceArgument_); } }; void foo() { std::list<int> myList; myList.push_back(2); myList.push_back(11); myList.push_back(8); myList.sort(MyintComparator(10)); // 11, 8, 2 std::set<int, MyintComparator> mySet(MyintComparator(7)); mySet.insert(2); mySet.insert(11); mySet.insert(8); // 8, 11, 2 // you can also do this with boost bind, see later return; } 2  3  4  5  6  7  8  9 10 11 12 2  3  4  5  6  7  8  9 10 11 12
Recap: Mistakes in op < // Bad example struct Z{ int i, j; Z(int ii, int jj) : i(ii), j(jj) {} bool operator < (const Z & rs) const { if(i > rs.i || j > rs.j) return false; return true; } }; Z z1(2, 3), z2(3, 2); // z1 < z2  -> true // z2 < z1  -> true //  // Does not satisfy antisymmetry // and Equivalence //  // You can have all kinds of issues: //  // not finding an inserted element // not getting things back in the correct // order // multiple copies inserted in a set, etc. Say you have a map, which is a red-black tree internally. The tree has the following elements in it: A /  B  C and you are about to insert element D. The following comparisons will be done: D < A A < D  if both are false then it means equality and D is inserted over A if D < A then it will go on the left branch and perform 2 more op< D < B B < D  if both are false, then it inserts D over B (or just drops D), if not, it will make further branches left or right from B If the op< is incorrect, then you may insert D and never find it in the set
Recap: Mistakes in op < // Bad example struct Z{ int i, j; Z(int ii, int jj) : i(ii), j(jj) {} }; class BadZComp : public binary_function<const Z &, const Z &, bool> { public: bool operator() (const Z & l, const Z & r) { return (l.i < r.i || l.i < r.j); // faulty implementation } }; int main(void) { //set<Z> xs; set<Z, BadZComp> xs; xs.insert(Z(3, 4)); xs.insert(Z(3, 5)); xs.insert(Z(2, 6)); set<Z, BadZComp>::iterator sit = xs.find(Z(3, 4)); return 0; } In order to find element Z(3,4) in the set the operator < needs to return  !(a < b) && !(b < a) Z(3, 4) < Z(3, 4) will return true, therefore the element will never be found in the set
Unordered associative containers: unordered_map, unordered_multimap, unordered_set, unordered_multiset class WWID { public: int wwid; WWID(int i = -1) : wwid(i) {} bool operator == (const WWID & rs) const {   return wwid == rs.wwid; } }; class ShoeSize { public: int shoeSize; ShoeSize(int i = 4) : shoeSize(i) {} bool operator == (const ShoeSize & rs) const {   return shoeSize == rs.shoeSize; } }; // Notice: no operator < () required on the types above – no ordering is happening struct MyHash { // return values 0, 1 or 2 s ize_t operator() (const WWID & key) const { return key.wwid % 3; }   }; These four types implement the hash tables in TR1 The name hash_table was widely used in legacy code, hence these new names These types are unordered, no operator < is required on them Operator == is required on the key The unordered_map and unordered_set keep unique copies of elements The unordered_multimap and unordered_multiset keep multiple elements with matching keys – lumped together in groups
unordered_map const MyHash myhash; // one object // key is WWID, type is ShoeSize unordered_map<WWID, ShoeSize, MyHash> map2( 3,  myhash);  // need 3 buckets, use myhash cout <<  map2.bucket_count() ;  // prints 8 (upsizes to power of 2 >= 8) cout << map2.max_bucket_count() ; // prints 8 cout << map2.bucket_size(0);  // prints 0 cout << map2.size();  // prints 0 – number of element in the unordered_map 0 1 2 3 4 5 6 7 bucket_count() max_bucket_count() bucket_size(0) == 0
unordered_map 0 (0, 5) 1 2 3 4 5 6 7 map2.insert(pair<WWID, ShoeSize>(WWID(0), ShoeSize(5))); // bs(0):1  bs(1):0 bs(2):0 size:1 b_cnt:8 max_b_cnt:8 map2.insert(make_pair(WWID(1), ShoeSize(6)));   // bs(0):1  bs(1):1 bs(2):0 size:2 b_cnt:8 max_b_cnt:8 map2.insert(pair<WWID, ShoeSize>( 2, 7 ));   // bs(0):1  bs(1):1 bs(2):1 size:3 b_cnt:8 max_b_cnt:8 map2.insert(pair<WWID, ShoeSize>( 3, 8 ));   // bs(0):2  bs(1):1 bs(2):1 size:4 b_cnt:8 max_b_cnt:8 (1, 6) (2, 7) (3, 8) Due to the implementation of the  MyHash::operator()   we will only insert elements in the first 3 buckets, even though the bucket_count of the map is 8  bucket_count()
unordered_map 0 (0, 5) 1 2 3 4 5 6 7 struct MyHash 2  { // return values 0 thru 8 s ize_t operator() (const WWID & key) const { return key.wwid %  9 ; }   }; MyHash2 myhash2; unordered_map<WWID, ShoeSize, MyHash 2 > map2( 3,  myhash 2 );  // need 3 buckets, use myhash2 map2.insert(pair<WWID, ShoeSize>(WWID(0), ShoeSize(5)));  // bs(0):1  bs(1):0 bs(2):0 bs(7):0 size:1 map2.insert(make_pair(WWID(1), ShoeSize(6)));   // bs(0):1  bs(1):1 bs(2):0 bs(7):0 size:2 map2.insert(pair<WWID, ShoeSize>( 2, 7 ));   // bs(0):1  bs(1):1 bs(2):1 bs(7):0 size:3 map2.insert(pair<WWID, ShoeSize>( 7, 12 ));   // bs(0):1  bs(1):1 bs(2):1 bs(7):1 size:4  map2.insert(pair<WWID, ShoeSize>( 8, 13 ));   // bs(0):2  bs(1):1 bs(2):1 bs(7):1 size:5  (1, 6) (2, 7) (8, 13) bucket_count() trying to insert (8, 13) in bucket 8  - unsuccessfully, inserted here instead  the hash function returns 8 for (8, 13) but the value is truncated to limit it to buckets 0 thru 7 value &= 0x07; // done automatically for you // having the bucket_count at power of 2  // comes in handy here (7, 12) 8
unordered_map 0 (0, 5) 1 2 3 4 5 6 7 cout << map2.load_factor();  // average load factor for a bucket; prints .625 (size()/bucket_count()) cout << map2.max_load_factor();  // prints 4 map2.max_load_factor(0.5); // sets  new target load factor map2.rehash(8); // rehash such that the load factor does not exceed target load factor, add new buckets if needed map2.max_load_factor(4); // sets target load factor back to 4 map2.rehash(8); // rehash, get back to original state, get at least 8 buckets (1, 6) (2, 7) (8, 13) bucket_count() == 8 (7, 12) 0 (0, 5) 1 2 3 4 5 6 7 (1, 6) (2, 7) bucket_count() == 16 (7, 12) 8 (8, 13) 15 max_load_factor(0.5) rehash(8) load_factor() == 0.625 (5/8) load_factor() == 0.3125 (5/16) … max_load_factor(4) rehash(8)
unordered_map 0 (0, 5) 1 2 3 4 5 6 7 struct MyHash { // return values 0, 1 or 2 s ize_t operator() (const WWID & key) const { return key.wwid % 3; }   }  myhash; unordered_map<WWID, ShoeSize, MyHash> map2( 3,  myhash); // insert the same 5 elements into map2 cout << map2.load_factor();  // average load factor for a bucket; prints .625 (size()/bucket_count()) cout << map2.max_load_factor();  // prints 4 map2.max_load_factor(0.5); // sets  new target load factor map2.rehash(8); // rehash such that the load factor does not exceed target load factor, add new buckets if needed (1, 6) (2, 7) (8, 13) bucket_count() == 8 (7, 12) rehash(8) load_factor() == 0.625 (5/8) 0 (0, 5) 1 2 3 4 5 6 7 (1, 6) (2, 7) (8, 13) bucket_count() == 16 (7, 12) load_factor() == 0.3125 (5/16) 15 elements are not moved to different buckets because the hash function returns only values  0, 1, 2 we did get a lower load factor but it’s not useful, all elements will be in buckets 0,1,2
unordered_map (0, 5) (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) (0, 5) (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) map2.begin() internally implemented as one  big list (14, 6) (11, 17) (33, 5) (14, 6) (11, 17) (33, 5) map2.end() map2.begin(0) map2.begin(1) map2.end(0) map2.end(2) map2.end(1) map2.begin(2) map2.begin(3) map2.end(3) unordered_map<WWID, ShoeSize, MyHash> ::iterator unordered_map<WWID, ShoeSize, MyHash> ::local_iterator 0 1 2 3 0 1 2 3
unordered_map 0 (0, 5) 1 2 struct MyHash { // return values 0, 1 or 2 s ize_t operator() (const WWID & key) const { return key.wwid  /   10 ; }   }  myhash; unordered_map<WWID, ShoeSize, MyHash> map2( 8,  myhash); // insert (12,7), (0,5), (7,12), (1, 6), (8,13), (2,7) map2.insert(make_pair(WWID( 2 ), ShoeSize( 7 )));  // attempting to re-insert (2,7) (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) 0 (0, 5) 1 2 (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) _list.begin() _list.end() internally implemented as one list; when we try to insert (2,7) for the second time, it searches backward within the list-portion of bucket 0 searches this portion of list the insert attempt returns an iterator pointing to original (2,7) paired with a bool false denoting the fact that the element was already in the map
unordered_map 0 (0, 5) 1 2 // returns an iterator to element (8, 13) unordered_map<WWID, ShoeSize, MyHash> ::iterator mit = map2.find(8);  // passing mit as hint iterator to speed up insertion map2.insert( mit,  make_pair(WWID( 1), ShoeSize(7))); (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) mit backit forwit compiler dependent usage of hint iterator mit could do 2 finger approach to search away from mit (Dinkumware didn’t use the hint though..) will find already existing element (1, 6)  and skip re-inserting returns pair (result, false) – the keys of (1, 6) and (1, 7) match, insert will look for key 1, will find (1, 6) and not insert (1,7) over (1,6) result
unordered_multiset 0 1 2 unordered_multiset<WWID, MyHash> set1; set1.insert(2); set1.insert(3); set1.insert(2); set1.insert(3); set1.insert(11); unordered_multiset<WWID, MyHash>::iterator i = set1.find( 2 );  // find one element with matching value // return an iterator pair ranging all the elements with matching values pair<unordered_multiset<WWID, MyHash>::iterator, unordered_multiset<WWID, MyHash>::iterator> itpair = set1.equal_range(3); for(unordered_multiset<WWID, MyHash>::iterator i1 = itpair.first; i1 != itpair.second; ++i1){ cout << &quot;i1 &quot; << *i1 << endl; }   set1.insert(itpair.first, 3);  // inserting elem 3 for the 3 rd  time, passing a hint iterator to speed up insertion cout << set1.count(3); // prints 3 3 11 3 2 2 i itpair.first itpair.second ++ ++ 3
time complexity of unordered associative containers X a(n, hash_fun);  // construct a container with at least n buckets, O(n) X a(first, last, n, hash_fun); // construct a cointainer with at least n buckets, then insert elements in the  // iterator range into the container; average O(N), worst O(N^2)  - N is the number of elements inserted a.insert(object);  // average O(1), worst O(a.size()) a.insert(hint_iterator, object); // average O(1), worst O(a.size()) a.erase(key); // erases element with matching key; average O(a.count(key)), worst O(a.size()) a.erase(it1, it2); // erases elements between the 2 iterators; average O(distance(it1, it2)), worst O(a.size()) a.clear(); // removes all elements; O(a.size()) a.find(k); // find element with matching key; average O(1), worst O(a.size()) a.count(k); // count elements with maching key; average O(1), worst O(a.size()) a.equal_range(k); // return a pair of iterators spanning the range with matching key; average O(a.count()), worst O(a.size()) a.bucket_size(n); // O(M) where M is the number of objects in bucket n the ordered associative containers set, multiset, map, multimap use a red-black tree for their implementation their insertion and query times are O(log(N)) the unordered associative containers have average times of insertion and query of O(1) – but it needs careful design and babysitting – or else they can deteriorate to O(n)
mem_fn (member function) Function Template // File #include <iostream.h> void g(int i) { cout << i ;} class XYZ{ public: int i; void luba(int ii); // non-static member function }; void XYZ::luba(int ii) { i = ii; cout << this << “,” << i << endl; } /* File t2.c  */ #include <stdio.h> /* declare a mangled C++ global function */ void g__Fi(int); /* declare a mangled C++ member function */ void luba__3XYZFi(void *, int); void f(void) { int j = 66;  g__Fi(j); /* will call g() from */ void * vp = malloc(100); /* will call void XYZ::luba(int); */ luba__3XYZFi(vp, j); /* passing vp to */ /* become this pointer ! */ } // nm t1.o will report: // Name  value  scope  type  subspace // g__Fi  |  0|  extern| entry| $CODE$ // luba__3XYZFi|  0|  extern| entry| $CODE$ /* nm t2.o will report */ /* f  |  0| extern| entry| $CODE$ */ /* g__Fi  |  |  undef|  code| */ /* luba__3XYZFi|  |  undef|  code| */ C++ file C file Example of calling C++ member function from C code. Educational purpose only. Do not do this at home!
mem_fn (member function) Function Template class AA{ int i; public: AA(int j = 0) : i(j) {} static int f0(double d) { return static_cast<int>(d);} int f1(double d) const  {return static_cast<int>(d * i);} const AA operator + (const AA & rs) const  { AA retval; retval.i = i + rs.i; return retval; }  int geti(void) const { return i; } }; int foo(const AA & a) { return a.geti();} void mem_fn_test(void) { int (* fooptr) (const AA &) = foo;   // just a pointer to a global function int (AA:: * f1ptr)(double)const = &AA::f1; // pointer to a member function of AA // pointer to a member operator of AA const AA (AA:: * operatorPlusPtr)(const AA &) const = &AA::operator +;  int (* f0ptr)(double)  = &AA::f0; // pointer to static member function AA a1(13); AA * a2 = new AA(14); shared_ptr<AA> a3(new AA(15)); weak_ptr<AA> a4 = a3; // continued on next page
mem_fn (member function) Function Template // continued from prev page mem_fn(f1ptr)(a1, 9.9); // equivalent to a1.f1(9.9); mem_fn(f1ptr)(a2, 11.3); // equivalent to a2->f1(11.3);  - notice that pointers and non-pointers mem_fn(f1ptr)(a3, 13.5); // equivalent to a3->f1(13.5);  - are handled with same syntax / /  mem_fn(f1ptr)(a4, 15.1); // equivalent to a4->f1(15.1); // doesnt' compile ( f0ptr)(8.8); // ok  – calling a static member function, does not need an object address for this (f1ptr)(8.8); // error - term does not evaluate to a function taking 1 argument AA a5 = mem_fn(operatorPlusPtr)(a2, a1);  // equivalent to a5 = (*a2) + a1 AA a6 = mem_fn(operatorPlusPtr)(a1, *a2); // equivalent to a6 = a1 + (*a2) AA a7 = mem_fn(operatorPlusPtr)(a2, *a3); // equivalent to a7 = (*a2) + (*a3) AA a8 = mem_fn(operatorPlusPtr)(a3, *a2); // equivalent to a8 = (*a3) + (*a2) }
The reference_wrapper Class Template class RR{  // sizeof(RR) == 4 public: int & ir; RR(int & j) : ir(j) {} }; int b; // some global int class PP{  // sizeof(PP) == 4 public: reference_wrapper<int> irw; PP(int & k)  : irw(k) { irw = b; // error } }; void test_reference_wrapper(void) { int ii(11), jj(12); RR r1(ii), r2(jj); r1 = r2;  // error, don’t know how to copy references PP p1(ii), p2(jj); cout << p1.irw.get() ; // prints 11 p1 = p2; cout << p1.irw.get() ; // prints 12 }
The reference_wrapper Class Template class AA{ int i; public: AA(int j = 0) : i(j) {} static int f0(double d) { return static_cast<int>(d);} int f1(double d) const  {return static_cast<int>(d * i);} const AA operator + (const AA & rs) const  { AA retval; retval.i = i + rs.i; return retval; }  int geti(void) const { return i; } }; int foo(const AA & a) { return a.geti();} int bar(const AA & a) { return a.geti() + 2; } void test_reference_wrapper 2 (void) { typedef int (* const myfuntype) (const AA &); reference_wrapper<myfuntype> fooref(&foo); //  creating a reference wrapper to foo() reference_wrapper<int (* const) (const AA &)>  barref(&bar) ; //  ref wrapper to bar() AA a1(12); fooref(a1); // calling foo barref(a1); // calling bar /* supposed to work but doesn't  fooref = cref(bar); fooref(a1); // calling bar */ reference_wrapper<int (AA:: * const)(double)const> memref(&AA::f1); mem_fn(memref.get())(a1, 8.7); // calling a member function via the reference wrapper }
The function Class Template class AA{ int i; public: AA(int j = 0) : i(j) {} }; int foo(const AA & a) { cout << &quot;foo&quot; << endl; return a.geti();} int bar(const AA & a) { cout << &quot;bar&quot; << endl; return a.geti();} void duba(void) { cout << &quot;in dubai&quot;; } void test_function(void) { function<void ()> fd(duba);  // constructing a function object fd(); // call duba typedef int (myfuntype) (const AA &);  // typedef of a function (not a function pointer) function<myfuntype> fun1; // defining empty function AA a1(12); fun1 = foo; // assigning a global function to fun1 fun1(a1); // calling fun1 with argument a1 fun1 = bar; //  fun1(a1); // calling bar }
The function Class Template class YY{ public: int _y; YY(int y) : _y(y) {} int operator () () const { cout << &quot;In YY::op(), this: &quot; << hex << this << dec << &quot;&quot;; return  0 ; } }; int yme() {   cout << &quot;In yme&quot; << endl;   return 5;} void fxx(void) {} void test_function(void) {  YY y1(13); // one object function<int ()> fun2;  // function object returning int, taking no arguments fun2 = y1; // copy of y1 made fun2(); // call y1.op()  – operates on a copy of y1 – 13 y1._y = 55; // modifying y1 fun2(); // still operates on the original copy of y1 – 13 (not 55 !) cout << &quot;sizeof fun2 &quot; << sizeof(fun2) << endl; //  prints  4 fun2 = YY(33); cout << (bool)(<YY>()) << endl;  // true cout << (bool)(<int ()>()) << endl; // false fun2(); // calling YY::op() on another object  cout << &quot;fun2.target_type().name() &quot; << fun2.target_type().name() << endl; // prints class YY // fun2.target_type() returns a reference to type_info object fun2 = yme; fun2(); // calling yme cout << &quot;fun2.target_type().name() &quot; << fun2.target_type().name() << endl; // prints int (__cdecl *)(void) cout << &quot;<YY> &quot; << (bool)(<YY>()) << endl; // false cout << &quot;<yme> &quot; << (bool)(<int ()>()) << endl; // true ?? why not?? //fun2 = f xx ; // error , non-matching type }
The function Class Template void test_function(void) {  YY y3(33), y4(44); function<int ()> fun3, fun4; cout << &quot;about to fun3 = y3&quot; << endl; fun3 = y3;  // y3 copied by value cout << &quot;about to fun4 = y4&quot; << endl; fun4 = y4;  // y4 copied by value cout << &quot;calling fun3()&quot; << endl; fun3(); cout << &quot;calling fun4()&quot; << endl; fun4(); cout << &quot;fun3 = 0&quot; << endl; fun3 = 0;  // setting fun3 to zero fun3.swap(fun4);  // swapping the 2 functions, fun3 holds copy of y4, fun4 is zero if(fun3) {cout << &quot;calling fun3()&quot; << endl; fun3(); }  // executed on copy of y4 if(fun4) {cout << &quot;calling fun4()&quot; << endl; fun4();}  // if(fun4) evaluates to false, no call }
The bind Function Template // Recap std:: bind1st, bind2nd // creating a predicate class myLessThan : public binary_function<int, int, bool> { public: bool operator () (int a, int b) const { return a < b; } }; bool myLessThan3(int a) { return a < 3; } void test_std_bind() { array<int, 4> arr = {1, 2, 3, 4}; int k ; count_if(arr.begin(), arr.end(), myLessThan3, k);  //  sets k to 2 count_if(arr.begin(), arr.end(), bind2nd(myLessThan(), 3), k); //  (*it < 3) sets k to 2 count_if(arr.begin(), arr.end(), bind1st(myLessThan(), 3), k); //  (3 < *it) sets k to 1 }
The bind Function Template bool myLessThan3(int a) { return a < 3; } bool myLessThanF(int a, int b) { return a < b; } using namespace std::tr1::placeholders;  // for _1, _2, etc void test_bind() { bind(myLessThan3, _1)(4);  //  4 < 3 false bind(myLessThan3, 4)();  // 4 < 3 false bind(myLessThan3, 2)();  // 2 < 3 true bind(myLessThanF,  4,  5 )();  //  myLessThanF (4, 5)  returns true bind(myLessThanF,  4, _1)(5);  //  myLessThanF (4, 5)  returns true bind(myLessThanF, _1, _2)(4, 5);  //  myLessThanF (4, 5)  returns true bind(myLessThanF, _2, _1)(4, 5);  //  myLessThanF ( 5, 4 )  returns false  bind(myLessThanF, _1,  5)(4);  //  myLessThanF (4, 5)  returns true // using runtime values int val1; cout << &quot;val1: &quot;; cin >> val1; int val2; cout << &quot;val2: &quot;; cin >> val2; b = bind(myLessThanF, val1, _1)(val2);  // will perform myLessThanF(val1, val2) }
The bind Function Template bool myLessThanF(int a, int b) { return a < b; } void test_bind() { array<int, 4> arr = {1, 2, 3, 4}; int k = 0; count_if(arr.begin(), arr.end(), bind(myLessThanF, 3, _1), k);  // k set to 1   //  3<1f  3<2f  3<3f  3<4t  //  1  2  3  4 count_if(arr.begin(), arr.end(), bind(myLessThanF, _1, 3), k);  // k set to 2 //  1<3t  2<3t  3<3f  4<3f  //  1  2  3  4 }
The bind Function Template bool inBetweenF(int l, int m, int r) { return (l <= m && m <= r); } void test_bind() { inBetweenF(7, 8, 9) ;  //  call (7, 8, 9)  ->  true  bind(inBetweenF, _1, _2, _3)(7, 8, 9) ;  //  call (7, 8, 9)  ->  true  bind(inBetweenF, _1, _3, _2)(7, 8, 9) ;  // c all (7, 9, 8)  ->  false  bind(inBetweenF, 7, _1, _2)   (8, 9) ;  //  call (7,  8, 9 )  ->  true  bind(inBetweenF, 7, 7, _1)   (8) ;  //  call (7, 7, 8)  ->  true  bind(inBetweenF, 7, _1, 8)   (7) ;  //  call (7, 7, 8)  ->  true  bind(inBetweenF, _1, 7, 9)   (6) ;  //  call (6, 7, 9)  ->  true bind(inBetweenF, _1, _1, _1)(10, 11, 12) ;  //  call (10, 10, 10)  -> true bind(inBetweenF, _3, _4, _4)(11, 22, 33, 44) ; // c all (33, 44, 44)  -> true } notice 4 arguments
The bind Function Template bool inBetweenF(int l, int m, int r) { return (l <= m && m <= r); } void test_bind() { array<int, 4> arr = {1, 2, 3, 4}; int k = 0; count_if(arr.begin(), arr.end(), bind(inBetweenF, 1, 2, _1), k);  // k set to 3 //  (1,2, 1 )f  (1,2, 2 )t  (1,2, 3 )t  (1,2, 4 )t //  1  2  3  4 count_if(arr.begin(), arr.end(), bind(inBetweenF, 1, _1, 4), k);  // k set to 4 //  (1, 1 ,4)t  (1, 2 ,4)t  (1, 3 ,4)t  (1, 4 ,4)t //  1  2  3  4 count_if(arr.begin(), arr.end(), bind(inBetweenF, 2, _1, 3), k);  // k set to 2 //  (2, 1 ,3)f  (2, 2 ,3)t  (2, 3 ,3)t  (2, 4 ,3)f //  1  2  3  4 count_if(arr.begin(), arr.end(), bind(inBetweenF, _1, 2, 4), k);  // k set to 2 //  ( 1 ,2,4)t  ( 2 ,2,4)t  (3,2,4)f  (4,2,4)f //  1  2  3  4 }
The bind Function Template class WW{ public: int w_; WW(int w = 0) : w_(w) {} bool amIinBetween(int l, int r)   {  return (l <= w_) && (w_ <= r);   } int operator () (int i) const  {   return i;  } typedef int result_type; }; class TT{ public: int t_; TT(int t  = 0) : t_(t) {} int foo(int i, int j) const {   return i + j + t_;   } }; void test_bind (void) { array<WW, 3> war = {1, 2, 3}; int k = 0; count_if(war.begin(), war.end(), bind(&WW::amIinBetween, _1, 2, 4), k);  // k set to 2  // testing bind chaining WW w1(11), w2(33); w1(3);  // call w1.op()(3) TT t1(13); bind(w1, 22)(); // equivalent to w1(22) bind(w2, bind(w1, 7)())(); // eq to w2(w1(7)) bind(&TT::foo, t1, 5, 6)(); // calls, 6) bind(&TT::foo, _3, _2, _1)(5, 6, t1); // calls 6, 5 ) / / supposed to work but doesn't compile //bind(&TT::foo, t1, h, bind(w1, _1))(2); //bind(&TT::foo, t1, bind(w1, _2), bind(w2, _1))(27, 37); // call, w2(27)) } binding a non-static member function of class WW to fixed arguments 2 and 4 the first argument _1 is the object of type WW the member function is called on
The bind Function Template bool inBetweenF(int l, int m, int r) {   return (l <= m && m <= r);   } bool matchDelta(int l, int m, int r) {   return (m == abs(l - r));   } void test_function_bind() { array<int, 3> r0 = { 1, 2, 3}, r1 = {11, 2, 1}; array< array<int, 3>*, 2> a2d = {&r0, &r1}; array<function<bool (int, int, int)>, 2> af; // 2 element array of function objects (empty) af[0] = inBetweenF; af[1] = matchDelta; for(int t = 0; t < 2; ++t){ // iterate twice for(int row = 0; row < 2; ++row){ // iterate on each row int k; count_if(a2d[row]->begin(), a2d[row]->end(), bind(af[row], 2, _1, 4), k); } af[0].swap(af[1]); } } 1 2 3 11 2 1 a2d inBetweenF  matchDelta  2, 1 ,4 2, 2 ,4 2, 3 ,4 2, 1 ,4 2, 2 ,4 2, 3 ,4 inBetweenF  matchDelta  2, 11 ,4 2, 2 ,4 2, 1 ,4 2, 11 ,4 2, 2 ,4 2, 1 ,4
The bind Function Template function<bool (int, float, char, double)> f4p; function<bool (int, float, char)> f3p; function<bool (int, float)> f2p; function<bool (int)> f1p; f4p = fooo4; f4p(3, 5.5, 'w', 9.9); f3p = fooo3; f3p(3, 5.5, 'w'); f2p = fooo2; f2p(3, 5.5); f1p = fooo1; f1p(3); bind(f4p, _1, 7.7, _2, 9.9)(5, 'w'); bind(f4p, 5, 7.7, _2, _1)(9.8, 'w'); f3p = bind(f4p, _1, _2, _3, 11.11); f3p(3, 5.5, 'w'); f2p = bind(f4p, _1, _2, 'w', 13.13); f2p(5, 6.6); f2p = bind(f3p, _1, _2, 'w'); f2p(7, 8.8); f1p = bind(f4p, _1, 7.7, 'w', 9.9); f1p(5); f1p = bind(f3p, _1, 7.7, 'w'); f1p(5); f1p = bind(f2p, _1, 7.7); f1p(5); function <bool (double)> f1pd; f1pd = bind(f4p, 5, 7.7, 'w', _1); f1pd(9.9); bool fooo4(int i, float j, char c, double d)  {/* */} bool fooo3(int i, float j, char c) {/* */} bool fooo2(int i, float) {/* */} bool fooo1(int i) {/* */} bool fooo4( int i,  float j,  char c,  double d ) bind(f4p,  _1,  7.7,  _2,  9.9) (5, 'w'); _1  _2 i j c d
Peformance C++ : inlining C++ code tends to have many many small functions (much more than C) constructors, destructors, operators, member access functions, etc. Function call overhead may dominate all your runtime without being aware of it Inlining is the first and most important runtime optimization you need to consider! class X{ public: X()  {} // default constructor X(int i) {} // constructor taking one int as argument ~X() {} // destructor X & operator = (const X & rs) {} // assignment bool operator == (const X & rs) const {} // comparison operator bool operator != (const X & rs) const {} // comparison operator X & operator ++ ( )  {} // pre-increment X  operator ++ (int) {} // post-increment // … }; int main(void) { for(X x1(0); x1 != X(100); x1++){ X temp[10];  // 1ct + 100*(1comp + 1ct + 1dt + 1ct + 1dt + 1post_incr +  //…  // 10ct + 1ass + 10dt)  =  2701  function calls temp[i] = x1; } return 0; } For this example, if you don’t have good inlining, your code will be very slow!
inlining – case study1 // declaration int myMax(int a, int b);  // definition int myMax(int a, int b)  { return a > b ? a : b; } long  foo(int v1, int v2) { long k = 0; for(int i = 0; i < v1; ++i) for(int j = 0; j < v2; ++j) k += myMax(i, j); return k; } int main(void) { foo(500000, 10000); return 0; } .globl _Z5 myMax ii .type _Z5myMaxii, @function _Z5myMaxii: .LFB2: .file 1 &quot;; .loc 1 8 0 pushq %rbp .LCFI0: movq %rsp, %rbp .LCFI1: movl %edi, -4(%rbp) movl %esi, -8(%rbp) .loc 1 10 0 movl -4(%rbp), %eax cmpl -8(%rbp), %eax jle .L2 movl -4(%rbp), %eax movl %eax, -12(%rbp) jmp .L3 .L2: movl -8(%rbp), %eax movl %eax, -12(%rbp) .L3: movl -12(%rbp), %eax .loc 1 11 0 leave ret g++ elapsed time: 32.85 0000000000000032 T _Z3fooii 0000000000000000 T _Z5myMaxii .globl _Z3 foo ii .type _Z3fooii, @function _Z3fooii: .LFB3: .loc 1 14 0 pushq %rbp .LCFI2: movq %rsp, %rbp .LCFI3: subq $24, %rsp .LCFI4: movl %edi, -20(%rbp) movl %esi, -24(%rbp) .LBB2: .loc 1 16 0 movq $0, -16(%rbp) .LBB3: .loc 1 17 0 movl $0, -8(%rbp) jmp .L6 .L9: .LBB4: .loc 1 18 0 movl $0, -4(%rbp) jmp .L7 .L8: .loc 1 19 0 movl -4(%rbp), %esi movl -8(%rbp), %edi call _Z5myMaxii cltq addq %rax, -16(%rbp) .loc 1 18 0 addl $1, -4(%rbp) .L7: movl -4(%rbp), %eax cmpl -24(%rbp), %eax jl .L8 .LBE4: .loc 1 17 0 addl $1, -8(%rbp) .L6: movl -8(%rbp), %eax cmpl -20(%rbp), %eax jl .L9 .LBE3: .loc 1 20 0 movq -16(%rbp), %rax .LBE2: .loc 1 22 0 leave ret myMax function body call to myMax
inlining – case study1 // declaration inline  int myMax(int a, int b);  // definition int myMax(int a, int b)  { return a > b ? a : b; } long  foo(int v1, int v2) { long k = 0; for(int i = 0; i < v1; ++i) for(int j = 0; j < v2; ++j) k += myMax(i, j); return k; } int main(void) { foo(500000, 10000); return 0; } .globl _Z5 myMax ii .type _Z5myMaxii, @function _Z5myMaxii: .LFB2: .file 1 &quot;; .loc 1 8 0 pushq %rbp .LCFI0: movq %rsp, %rbp .LCFI1: movl %edi, -4(%rbp) movl %esi, -8(%rbp) .loc 1 10 0 movl -4(%rbp), %eax cmpl -8(%rbp), %eax jle .L2 movl -4(%rbp), %eax movl %eax, -12(%rbp) jmp .L3 .L2: movl -8(%rbp), %eax movl %eax, -12(%rbp) .L3: movl -12(%rbp), %eax .loc 1 11 0 leave ret g++ elapsed time: 22.94 0000000000000020 T _Z3fooii 0000000000000000 T _Z5myMaxii _Z3fooii: .LFB3: .loc 1 14 0 pushq %rbp .LCFI2: movq %rsp, %rbp .LCFI3: movl %edi, -36(%rbp) movl %esi, -40(%rbp) .LBB7: .loc 1 16 0 movq $0, -24(%rbp) .LBB8: .loc 1 17 0 movl $0, -16(%rbp) jmp .L6 .L11: .LBB9: .loc 1 18 0 movl $0, -12(%rbp) jmp .L7 .L10: movl -16(%rbp), %eax movl %eax, -4(%rbp) movl -12(%rbp), %eax movl %eax, -8(%rbp) .LBB10: .LBB11: .loc 1 10 0 movl -4(%rbp), %eax cmpl -8(%rbp), %eax jle .L8 movl -4(%rbp), %eax movl %eax, -44(%rbp) jmp .L9 .L8: movl -8(%rbp), %eax movl %eax, -44(%rbp) .L9: movl -44(%rbp), %eax .LBE11: .LBE10: .loc 1 19 0 cltq addq %rax, -24(%rbp) .loc 1 18 0 addl $1, -12(%rbp) .L7: movl -12(%rbp), %eax cmpl -40(%rbp), %eax jl .L10 .LBE9: .loc 1 17 0 addl $1, -16(%rbp) .L6: movl -16(%rbp), %eax cmpl -36(%rbp), %eax jl .L11 .LBE8: .loc 1 20 0 movq -24(%rbp), %rax .LBE7: .loc 1 22 0 leave ret myMax function body body of myMax inserted into code no more call to myMax
inlining – case study1 // declaration inline  int myMax(int a, int b);  // definition int myMax(int a, int b)  { return a > b ? a : b; } long  foo(int v1, int v2) { long k = 0; for(int i = 0; i < v1; ++i) for(int j = 0; j < v2; ++j) k += myMax(i, j); return k; } int main(void) { foo(500000, 10000); return 0; } g++ -O2 elapsed time: 6.26 0000000000000000 T _Z3fooi .globl _Z3fooii .type _Z3fooii, @function _Z3fooii: .LFB3: .file 1 &quot;; .loc 1 14 0 .LVL0: .LBB7: .LBB8: .LBB9: .loc 1 18 0 xorl %r8d, %r8d .LVL1: .LBE9: .loc 1 17 0 xorl %ecx, %ecx .LVL2: testl %edi, %edi jle .L5 .LVL3: .p2align 4,,10 .p2align 3 .L3: xorl %edx, %edx .LVL4: .LBB10: .loc 1 18 0 testl %esi, %esi jle .L7 .p2align 4,,10 .p2align 3 .L9: .loc 1 19 0 cmpl %ecx, %edx movl %ecx, %eax cmovge %edx, %eax .loc 1 18 0 addl $1, %edx .LVL5: .loc 1 19 0 cltq addq %rax, %r8 .loc 1 18 0 cmpl %edx, %esi jg .L9 .L7: .LBE10: .loc 1 17 0 addl $1, %ecx .LVL6: cmpl %ecx, %edi jg .L3 .L5: .LBE8: .LBE7: .loc 1 22 0 movq %r8, %rax ret body of myMax inserted into code no more call to myMax myMax function doesn’t even show up in the symbol table
inline – comparing various options 6 11 22 22 32 32 Runtime No Yes Yes Yes Yes Yes does the object file contain the function? Yes No Yes Yes No No did compiler inline the function? No No Yes Yes No No __attribute__ ((always_inline)) compiler directive No Yes Yes No No No --no-inline compiler option Yes Yes No No Yes No inline keyword Yes Yes No  No No No -O2
inlining – case study2 // t.h #define II inline //#define II class X{ private: int i_; public: X(int i); ~X(); bool operator > (const X & rs) const; X & operator = (const X & rs); X & operator ++ (void); int i(void) const; }; II X::X(int i) : i_(i) {} II X::~X() {} II bool X::operator > (const X & rs) const { return  i_ > rs.i_; } II int X::i(void) const { return i_; } II X & X::operator ++(void)  { ++i_; return * this; } // declaration II int  myMax(X a, X b);  // definition int  myMax(X a, X b)  { return a > b ? a .i() : b.i(); } #ifdef II  #undef II #endif g++ -O2 nm t.o 0000000000000000 T _Z3foo1XS_ runtime  2.3 // #include &quot;t.h&quot; long  foo(X v1, X v2) { long k = 0; for(X i = 0; !(i > v1); ++i) for(X j = 0; !(j > v2); ++j) k += myMax(i, j); return k; } // #include &quot;t.h&quot; long foo(X v1, X v2); int main(void) { long res = foo(X(200000), X(10000)); return 0; } g++ nm t.o 00000000000000cd T _Z3foo1XS_ 000000000000008d T _Z5myMax1XS_ 0000000000000016 T _ZN1XC1Ei 0000000000000000 T _ZN1XC2Ei 0000000000000036 T _ZN1XD1Ev 000000000000002c T _ZN1XD2Ev 0000000000000070 T _ZN1XppEv 0000000000000060 T _ZNK1X1iEv 0000000000000040 T _ZNK1XgtERKS runtime 37.46 16x speedup!
inlining – case study 3 class Rectangle{ public: int xl, yl, xh, yh; }; void foo(Rectangle r1) { r1.xl = r1.yh; } .type  _Z3foo9Rectangle, @function _Z3foo9Rectangle: .LFB2: pushq  %rbp .LCFI0: movq  %rsp, %rbp .LCFI1: movq  %rdi, -16(%rbp) movq  %rsi, -8(%rbp) movl  -4(%rbp), %eax movl  %eax, -16(%rbp) leave ret stack pointer yh Return Address 0 -4 xh -8 yl -12 xl -16 The assignment in question is executed in 2 assembly instructions Very fast! As fast as C. eax register
inlining – case study 3 class D1D{ private: int  v_; // 0 - LOW, 1 - HIGH public: D1D(int i) : v_(i) {} operator int () { return v_;} }; class D2D{ private: int v_; // 0 WEST, 1 EAST, 2 SOUTH, 3 NORTH public: D2D(int v) : v_(v) {} operator D1D () { return D1D(v_ & 1); } bool isVertical() { return (v_ >> 1); } operator int (void) { return v_;} }; const D1D LOW(0), HIGH(1); const D2D WEST(0), EAST(1), SOUTH(2), NORTH(3); class Interval{ private: int v_[2]; public: int & get(D1D d) { return v_[d]; } Interval(int l, int h) { v_[0] = l; v_[1] = h; } }; class Rectangle{ private: Interval i_[2]; public: int & get(D2D d) { return i_[d.isVertical()].get(d); } };  void foo(Rectangle r1) { r1.get(WEST) = r1.get(NORTH); } isVertical Interval::get D1D::op int() D2D::op D1D() The foo () function can be made almost as fast as the one in the previous page, provided that all the functions inline. Otherwise, could be much slower!
inlining – migration f1 f2 10 f3 10 f4 4 f5 4 f6 6 small small small small big big f inlined function f out of line function problem (4 calls) f1 f2 10 f3 10 f4 4 f5 4 f6 6 small small small small big big problem (10 calls) // force f4 to be inlined inline void f4() __attribute__ ((always_inline)) the compiler was forced to inline f4, but in the process it gave up on trying to inline f3, so the code actually became slower!
inlining - summary ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
Code vs. Heap vs. Stack vs. Static Memory int a;  static int b; extern int x = 3; namespace BB{ int b; } class Z{ public: static int zz; int k; }; int Z::zz; extern int y; // no storage Z z1; void foo() {} void bar() {} int main(void) { int jj; static int s; Z z2; char * cp; cp = new char[100]; Z * zp; zp = new Z; } Static Memory a  0x6013c0 b  0x6013d4 x  0x601298 BB::b  0x6013c4 Z::zz  0x6013c8 zp->zz 0x6013c8 z1  0x6013cc s  0x6013d8 Code Memory foo  0x4009e0 bar  0x4009f0 main 0x400a30 Heap Memory *zp  0x602080 zp->k 0x602080 *cp  0x602010 Stack Memory jj 0x7fbfffeafc z2 0x7fbfffeaf0 cp 0x7fbfffeae8 zp 0x7fbfffeae0 the 4 memory segments can be placed in very different address spaces
Peformance C++ : Caching int main(void) { list<int> s; vector<int> v; for(int i = 0; i < 50000000; ++i){ s.push_back(i); v.push_back(i); } long ss = 0, sv = 0; for(set<int>::iterator sit = s.begin(); sit != s.end(); ++sit){ ss += *sit; } for(int i = 0; i < v.size(); ++i){ sv2 += v[i]; } return 0; } next prev int next prev int next prev int next prev int next prev control list allocator (heap) of list, all list elements allocated one after each other begin middle end vector 0.54 sec 0.09 sec  6x faster vector is faster, elements are next to each other in memory, good cache performance list is slower (6x) because it needs an extra dereferencing. Elements are in the heap next to each other, but each element is bigger (2 extra pointers), which slows it down (more memory traffic, more cache misses)
Caching – effects on performance int main(void) { list<int> s, s2; vector<int> v; for(int i = 0; i < 50000000; ++i){ s.push_back(i); s2.push_back(i); // just to spread elements v.push_back(i); } long ss = 0, sv = 0; for(list<int>::iterator sit = s.begin(); sit != s.end(); ++sit){ ss += *sit; } for(vector<int>::iterator vit = v.begin(); vit != v.end(); ++vit ){ sv2 += *vit; } return 0; } next prev int next prev int next prev int next prev int next prev control list allocator (heap) of list, all list elements allocated one after each other begin middle end vector 1.08 sec (2x slower than before) 0.06 sec  (30% faster than before) vector is faster, elements are next to each other in memory, good cache performance In addition, now we’re using the iterator, which has a better cache performance than using an index into the vecotor (which btw. introduces an extra multiply) now the list is even slower (18x), because its elements are not following each other in the heap, there’s an element of list s2 wedged in-between, worsening the cache misses, making the list twice as slow as before
stl set vs. vector find comparison 2 6 1 4 3 5 vtemp vector numbers 1-M shuffled 6 4 3 2 1 5 v vector numbers 1-M shuffled (differently) 1 3 2 4 6 5 s set  numbers 1-M ordered on RB tree // linear search in vector using vector::find for(vector<X>::iterator vit = vtemp.begin(); vit != vtemp.end(); ++vit){ i = find(v.begin(), v.end(), *vit); } // linear search in vector using iteration begin -> end for(vector<X>::iterator vit = vtemp.begin(); vit != vtemp.end(); ++vit){  for(i = v.begin(); i != v.end(); ++i){ if(*i == *vit) break; } } // O(logn) search on set using set::find for(vector<X>::iterator vit = vtemp.begin(); vit != vtemp.end(); ++vit){ si = s.find(*vit); } This is an overload used by find() for the RAI case. 168  template<typename _RandomAccessIterator, typename _Tp> 169  _RandomAccessIterator 170  __find(_RandomAccessIterator __first, _RandomAccessIterator __last, 171  const _Tp& __val, random_access_iterator_tag) 172  { 173  typename iterator_traits<_RandomAccessIterator>::difference_type 174  __trip_count = (__last - __first) >> 2; 175  176  for (; __trip_count > 0; --__trip_count) 177  { 178  if (*__first == __val) 179  return __first; 180  ++__first; 181  182  if (*__first == __val) 183  return __first; 184  ++__first; 185  186  if (*__first == __val) 187  return __first; 188  ++__first; 189  190  if (*__first == __val) 191  return __first; 192  ++__first; 193  } 194  195  switch (__last - __first) 196  { 197  case 3: 198  if (*__first == __val) 199  return __first; 200  ++__first; 201  case 2: 202  if (*__first == __val) 203  return __first; 204  ++__first; 205  case 1: 206  if (*__first == __val) 207  return __first; 208  ++__first; 209  case 0: 210  default: 211  return __last; 212  } 213  } 214  Note: linear vector::find() is faster than logarithmic set::find() on M < 160 !! loop unrolling in vector::find makes a big difference!!
Caching – can you spot the problems? // Example 1 vector<int> vx, vy; // huge vectors, storing x and y coordinates class Rectangle{ public: int xl, yl, xh, yh; Rectangle(size_t xli, size_t yli, size_t xhi, size_t yhi)  : xl(vx[xli), yl(vy[yli]), xh(vx[xhi]), yh(vy[yhi]) {} } // example 2 vector<int> v; void foo() { vector<int> w; for( .. ) { temp = v[i] + w[i]; } } // example 3 void bar() { set<int> si; map<int, char> mi; for( .. ){ si.insert(i); mi.insert(make_pair(i, c)); } }
Caching – summary ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
Peformance C++ : if-less code // median function implementation in STL inline const int & median( const int & a,  const int & b,  const int & c) { if (a < b) if (b < c) return b; else if (a < c) return c; else return a; else if (a < c) return a; else if (b < c) return c; else return b; } _Z6medianRKiS0_S0_: .LFB2: movl (%rdi), %ecx movl (%rsi), %eax cmpl %eax, %ecx jge .L2 movl (%rdx), %r8d cmpl %r8d, %eax jl .L3 cmpl %r8d, %ecx jge .L5 .L4: movq %rdx, %rsi .L3: movq %rsi, %rax .p2align 4,,1 .p2align 3 ret .p2align 4,,10 .p2align 3 .L2: movl (%rdx), %r8d cmpl %r8d, %ecx .p2align 4,,2 .p2align 3 jl .L5 cmpl %r8d, %eax .p2align 4,,2 .p2align 3 jl .L4 movq %rsi, %rax .p2align 4,,3 .p2align 3 ret .p2align 4,,10 .p2align 3 .L5: movq %rdi, %rsi movq %rsi, %rax .p2align 4,,4 .p2align 3 ret
if-less code const int & median( const int & a,  const int & b, const int & c) { const bool alb = a < b; const bool blc = b < c; const bool alc = a < c; const int *  input[3] = {&a,&b,&c}; unsigned int index = 0;  index += (alb & blc) | (!alb & !blc); index += (unsigned int)((alc & !blc) |  (!alc & blc)) << 1;  return *(input[index]); } __Z6medianRKiS0_S0_: .LFB2: movl (%rsi), %ecx movl (%rdi), %r8d movl (%rdx), %eax movq %rsi, -32(%rsp) movq %rdx, -24(%rsp) movq %rdi, -40(%rsp) cmpl %ecx, %r8d setl %r9b cmpl %eax, %ecx setl %cl cmpl %eax, %r8d movl %r9d, %edx setl %r8b movzbl %cl, %esi xorl $1, %ecx movl %r8d, %eax movzbl %cl, %ecx xorl $1, %edx xorl $1, %eax andl %ecx, %r8d andl %edx, %ecx andl %esi, %eax andl %r9d, %esi orl %r8d, %eax orl %esi, %ecx leal (%rcx,%rax,2), %eax mov %eax, %eax movq -40(%rsp,%rax,8), %rax movl (%rax), %eax ret straight code, no ifs, no conditional jumps, faster!
Runtime comparison between stl median and ifless median unsigned int M = 100000000; vector<int> v; for(int i = 0; i < M; ++i){ v.push_back(rand()); } for(size_t i = 2; i < M; ++i){ ss1 += stl_median(v[i-2], v[i-1], v[i]); } for(size_t i = 2; i < M; ++i){ ss1 += median(v[i-2], v[i-1], v[i]); } for(size_t i = 2; i < M; ++i){ ss1 += stl_median(v[i-2], v[i-1], v[i++]); … ss1 += stl_median(v[i-2], v[i-1], v[i++]); // 8 – way loop unrolling  } for(size_t i = 2; i < M; ++i){ ss1 += median(v[i-2], v[i-1], v[i++]); … ss1 += median(v[i-2], v[i-1], v[i++]); // 8 – way loop unrolling  } ts = total time in for loop for  stl median ts tg = total time in for loop for  stl median ts ts8 = total time in for loop for  stl median ts tg8 = total time in for loop for  stl median ts ts/tg = 0.82 ts8/tg8 = 0.66 median + O stl_median + O = 0.82 median + O/8 stl_median + O/8 = 0.66 median/stl_median = 0.63  ifless median is 37% faster than stl median !
Performance – summary ,[object Object],[object Object],[object Object],[object Object]

More Related Content

What's hot

Paradigmas de Linguagens de Programacao - Aula #5
Paradigmas de Linguagens de Programacao - Aula #5Paradigmas de Linguagens de Programacao - Aula #5
Paradigmas de Linguagens de Programacao - Aula #5
Ismar Silveira
C Programming Tutorial -
C Programming Tutorial - www.infomtec.comC Programming Tutorial -
C Programming Tutorial -
M-TEC Computer Education
basics of C and c++ by eteaching
basics of C and c++ by eteachingbasics of C and c++ by eteaching
basics of C and c++ by eteaching
C++ presentation
C++ presentationC++ presentation
C++ presentation
C++ Programming Language
C++ Programming Language C++ Programming Language
C++ Programming Language
Mohamed Loey
Unit ii ppt
Unit ii pptUnit ii ppt
C++11: Feel the New Language
C++11: Feel the New LanguageC++11: Feel the New Language
C++11: Feel the New Languagemspline
Getting Started with C++
Getting Started with C++Getting Started with C++
Getting Started with C++
Praveen M Jigajinni
Virtual function in C++ Pure Virtual Function
Virtual function in C++ Pure Virtual Function Virtual function in C++ Pure Virtual Function
Virtual function in C++ Pure Virtual Function
Kamlesh Makvana
C fundamentals
C fundamentalsC fundamentals
(2) c sharp introduction_basics_part_i
(2) c sharp introduction_basics_part_i(2) c sharp introduction_basics_part_i
(2) c sharp introduction_basics_part_i
Nico Ludwig
What is c
What is cWhat is c
What is c
Nitesh Saitwal
C++ Language
C++ LanguageC++ Language
C++ Language
Presentation on C++ Programming Language
Presentation on C++ Programming LanguagePresentation on C++ Programming Language
Presentation on C++ Programming Language
C++ How to program
C++ How to programC++ How to program
C++ How to program
Mohammad Golyani
Basics of c++ Programming Language
Basics of c++ Programming LanguageBasics of c++ Programming Language
Basics of c++ Programming Language
Ahmad Idrees
Mesics lecture 5 input – output in ‘c’
Mesics lecture 5   input – output in ‘c’Mesics lecture 5   input – output in ‘c’
Mesics lecture 5 input – output in ‘c’eShikshak

What's hot (20)

Paradigmas de Linguagens de Programacao - Aula #5
Paradigmas de Linguagens de Programacao - Aula #5Paradigmas de Linguagens de Programacao - Aula #5
Paradigmas de Linguagens de Programacao - Aula #5
C Programming Tutorial -
C Programming Tutorial - www.infomtec.comC Programming Tutorial -
C Programming Tutorial -
basics of C and c++ by eteaching
basics of C and c++ by eteachingbasics of C and c++ by eteaching
basics of C and c++ by eteaching
Intro to c++
Intro to c++Intro to c++
Intro to c++
C++ presentation
C++ presentationC++ presentation
C++ presentation
C++ Programming Language
C++ Programming Language C++ Programming Language
C++ Programming Language
Unit ii ppt
Unit ii pptUnit ii ppt
Unit ii ppt
C++11: Feel the New Language
C++11: Feel the New LanguageC++11: Feel the New Language
C++11: Feel the New Language
Getting Started with C++
Getting Started with C++Getting Started with C++
Getting Started with C++
Virtual function in C++ Pure Virtual Function
Virtual function in C++ Pure Virtual Function Virtual function in C++ Pure Virtual Function
Virtual function in C++ Pure Virtual Function
C fundamentals
C fundamentalsC fundamentals
C fundamentals
(2) c sharp introduction_basics_part_i
(2) c sharp introduction_basics_part_i(2) c sharp introduction_basics_part_i
(2) c sharp introduction_basics_part_i
What is c
What is cWhat is c
What is c
C++ Language
C++ LanguageC++ Language
C++ Language
Presentation on C++ Programming Language
Presentation on C++ Programming LanguagePresentation on C++ Programming Language
Presentation on C++ Programming Language
C++ How to program
C++ How to programC++ How to program
C++ How to program
Lập trình C
Lập trình CLập trình C
Lập trình C
Basics of c++ Programming Language
Basics of c++ Programming LanguageBasics of c++ Programming Language
Basics of c++ Programming Language
Mesics lecture 5 input – output in ‘c’
Mesics lecture 5   input – output in ‘c’Mesics lecture 5   input – output in ‘c’
Mesics lecture 5 input – output in ‘c’

Viewers also liked

Advanced CPP Lecture 2- Summer School 2014 - ACA CSE IITK
Advanced CPP Lecture 2- Summer School 2014 - ACA CSE IITKAdvanced CPP Lecture 2- Summer School 2014 - ACA CSE IITK
Advanced CPP Lecture 2- Summer School 2014 - ACA CSE IITKPankaj Prateek
Idiomatic C++
Idiomatic C++Idiomatic C++
Idiomatic C++
Federico Ficarelli
The Style of C++ 11
The Style of C++ 11The Style of C++ 11
The Style of C++ 11
Sasha Goldshtein
Distributed Systems Design
Distributed Systems DesignDistributed Systems Design
Distributed Systems Design
Dennis van der Stelt
Operator overloading
Operator overloadingOperator overloading
Operator overloadingfarhan amjad
Improving The Quality of Existing Software
Improving The Quality of Existing SoftwareImproving The Quality of Existing Software
Improving The Quality of Existing Software
Steven Smith
Web Service Basics and NWS Setup
Web Service  Basics and NWS SetupWeb Service  Basics and NWS Setup
Web Service Basics and NWS Setup
Northeastern University
Bjarne Stroustrup - The Essence of C++: With Examples in C++84, C++98, C++11,...
Bjarne Stroustrup - The Essence of C++: With Examples in C++84, C++98, C++11,...Bjarne Stroustrup - The Essence of C++: With Examples in C++84, C++98, C++11,...
Bjarne Stroustrup - The Essence of C++: With Examples in C++84, C++98, C++11,...
Complement Verb
Operator overloading
Operator overloading Operator overloading
Operator overloading
Northeastern University
An Introduction to Part of C++ STL
An Introduction to Part of C++ STLAn Introduction to Part of C++ STL
An Introduction to Part of C++ STL
乐群 陈
Solid principles of oo design
Solid principles of oo designSolid principles of oo design
Solid principles of oo design
SOLID Principles part 2
SOLID Principles part 2SOLID Principles part 2
SOLID Principles part 2
Dennis van der Stelt
Programming In C++
Programming In C++ Programming In C++
Programming In C++
shammi mehra
SOLID Principles part 1
SOLID Principles part 1SOLID Principles part 1
SOLID Principles part 1
Dennis van der Stelt
Exception handling and templates
Exception handling and templatesException handling and templates
Exception handling and templatesfarhan amjad
Inheritance, polymorphisam, abstract classes and composition)
Inheritance, polymorphisam, abstract classes and composition)Inheritance, polymorphisam, abstract classes and composition)
Inheritance, polymorphisam, abstract classes and composition)
farhan amjad
Memory Management In C++
Memory Management In C++Memory Management In C++
Memory Management In C++
ShriKant Vashishtha
Building Embedded Linux
Building Embedded LinuxBuilding Embedded Linux
Building Embedded LinuxSherif Mousa

Viewers also liked (20)

03 dp
03 dp03 dp
03 dp
Advanced CPP Lecture 2- Summer School 2014 - ACA CSE IITK
Advanced CPP Lecture 2- Summer School 2014 - ACA CSE IITKAdvanced CPP Lecture 2- Summer School 2014 - ACA CSE IITK
Advanced CPP Lecture 2- Summer School 2014 - ACA CSE IITK
Idiomatic C++
Idiomatic C++Idiomatic C++
Idiomatic C++
The Style of C++ 11
The Style of C++ 11The Style of C++ 11
The Style of C++ 11
Distributed Systems Design
Distributed Systems DesignDistributed Systems Design
Distributed Systems Design
Operator overloading
Operator overloadingOperator overloading
Operator overloading
Improving The Quality of Existing Software
Improving The Quality of Existing SoftwareImproving The Quality of Existing Software
Improving The Quality of Existing Software
Web Service Basics and NWS Setup
Web Service  Basics and NWS SetupWeb Service  Basics and NWS Setup
Web Service Basics and NWS Setup
Bjarne Stroustrup - The Essence of C++: With Examples in C++84, C++98, C++11,...
Bjarne Stroustrup - The Essence of C++: With Examples in C++84, C++98, C++11,...Bjarne Stroustrup - The Essence of C++: With Examples in C++84, C++98, C++11,...
Bjarne Stroustrup - The Essence of C++: With Examples in C++84, C++98, C++11,...
Operator overloading
Operator overloading Operator overloading
Operator overloading
An Introduction to Part of C++ STL
An Introduction to Part of C++ STLAn Introduction to Part of C++ STL
An Introduction to Part of C++ STL
Solid principles of oo design
Solid principles of oo designSolid principles of oo design
Solid principles of oo design
SOLID Principles part 2
SOLID Principles part 2SOLID Principles part 2
SOLID Principles part 2
Programming In C++
Programming In C++ Programming In C++
Programming In C++
SOLID Principles part 1
SOLID Principles part 1SOLID Principles part 1
SOLID Principles part 1
Exception handling and templates
Exception handling and templatesException handling and templates
Exception handling and templates
Inheritance, polymorphisam, abstract classes and composition)
Inheritance, polymorphisam, abstract classes and composition)Inheritance, polymorphisam, abstract classes and composition)
Inheritance, polymorphisam, abstract classes and composition)
Memory Management In C++
Memory Management In C++Memory Management In C++
Memory Management In C++
Building Embedded Linux
Building Embedded LinuxBuilding Embedded Linux
Building Embedded Linux

Similar to C++ Advanced

Functions And Header Files In C++ | Bjarne stroustrup
Functions And Header Files In C++ | Bjarne stroustrupFunctions And Header Files In C++ | Bjarne stroustrup
Functions And Header Files In C++ | Bjarne stroustrup
Programming in C Basics
Programming in C BasicsProgramming in C Basics
Programming in C Basics
Bharat Kalia
function in in thi pdf you will learn what is fu...
function in  in thi pdf you will learn   what                           is fu...function in  in thi pdf you will learn   what                           is fu...
function in in thi pdf you will learn what is fu...
C programming
C programmingC programming
C programming
Karthikeyan A K
About Go
About GoAbout Go
About Go
Jongmin Kim
Php Reusing Code And Writing Functions
Php Reusing Code And Writing FunctionsPhp Reusing Code And Writing Functions
Php Reusing Code And Writing Functionsmussawir20
An imperative study of c
An imperative study of cAn imperative study of c
An imperative study of c
Tushar B Kute
Function recap
Function recapFunction recap
Function recapalish sha
Function recap
Function recapFunction recap
Function recapalish sha
C programming session 01
C programming session 01C programming session 01
C programming session 01Dushmanta Nath
C++ Function
C++ FunctionC++ Function
C++ FunctionHajar
Practical Meta Programming
Practical Meta ProgrammingPractical Meta Programming
Practical Meta ProgrammingReggie Meisler
Hooking signals and dumping the callstack
Hooking signals and dumping the callstackHooking signals and dumping the callstack
Hooking signals and dumping the callstack
Thierry Gayet
Modern c++ (C++ 11/14)
Modern c++ (C++ 11/14)Modern c++ (C++ 11/14)
Modern c++ (C++ 11/14)
Geeks Anonymes
Falcon初印象勇浩 赖
46630497 fun-pointer-1
46630497 fun-pointer-146630497 fun-pointer-1
46630497 fun-pointer-1
AmIt Prasad

Similar to C++ Advanced (20)

Functions And Header Files In C++ | Bjarne stroustrup
Functions And Header Files In C++ | Bjarne stroustrupFunctions And Header Files In C++ | Bjarne stroustrup
Functions And Header Files In C++ | Bjarne stroustrup
Programming in C Basics
Programming in C BasicsProgramming in C Basics
Programming in C Basics
function in in thi pdf you will learn what is fu...
function in  in thi pdf you will learn   what                           is fu...function in  in thi pdf you will learn   what                           is fu...
function in in thi pdf you will learn what is fu...
C programming
C programmingC programming
C programming
About Go
About GoAbout Go
About Go
Php Reusing Code And Writing Functions
Php Reusing Code And Writing FunctionsPhp Reusing Code And Writing Functions
Php Reusing Code And Writing Functions
An imperative study of c
An imperative study of cAn imperative study of c
An imperative study of c
Function recap
Function recapFunction recap
Function recap
Function recap
Function recapFunction recap
Function recap
C programming session 01
C programming session 01C programming session 01
C programming session 01
C++ Function
C++ FunctionC++ Function
C++ Function
Practical Meta Programming
Practical Meta ProgrammingPractical Meta Programming
Practical Meta Programming
Hooking signals and dumping the callstack
Hooking signals and dumping the callstackHooking signals and dumping the callstack
Hooking signals and dumping the callstack
Modern c++ (C++ 11/14)
Modern c++ (C++ 11/14)Modern c++ (C++ 11/14)
Modern c++ (C++ 11/14)
Antlr V3
Antlr V3Antlr V3
Antlr V3
46630497 fun-pointer-1
46630497 fun-pointer-146630497 fun-pointer-1
46630497 fun-pointer-1

C++ Advanced

  • 1. C++0X Standard C++ Standard Library Extensions Technical Report 1 (TR1) Advanced C++ Runtime Improvement Techniques Gyuszi Suto November 2009
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7. Lambda [] (int x) -> int { return x; } (10); capture clause empty – no local variables can be accessed = local variables accessed by value, not lvalue & local variables accessed by reference, lvalue formal parameters to lambda function specifies return type (not req’d if the entire function body is contained within one return statemet) body of lambda function actual argument(s) passed to lambda function
  • 8. Lambda int main(void) { // lambda function, no access to local variables auto lambdaf3 = [] (int i) { return i+3; }; cout << lambdaf3(4) << &quot;&quot;; // prints 7 // accesses local variable by reference (and modifies it) int localv = 9; auto lambdaf4 = [&] (int i) { return localv = i; }; cout << lambdaf4(7) << &quot;, &quot; << localv << endl; // modifies localv, prints 7, 7 int localv2 = 11; auto lambdaf5 = [] (int i) { return i + localv2; }; // error, don't know what localv2 is cout << lambdaf5(4) << endl; int localv3 = 13; auto lambdaf6 = [=] (int i) { return i + localv3; }; // passing local context by value cout << lambdaf6(5) << endl; // prints 18 }
  • 9. Lambda int main(void) { int localv3 = 13; auto lambdaf7 = [=] (int i) { return localv3 += i; }; // error, localv3 is not a modifiable l value int a = 3, b = 4; auto lambdaf8 = [=, &b] (int i) { return 3 + (b += (a + i)); }; cout << lambdaf8(3) << &quot;, &quot; << b << endl; // accesses a by value, b by ref, prints 13, 10 array<int, 5> ia = { 9, -3, 2, 13, -7}; // defines and uses a lambda in the context of a standard sort std::sort(ia.begin(), ia.end(), [] (int a, int b) { return std::abs(a) < std::abs(b); } ); print_collection(ia); // prints 2 -3 -7 9 13 }
  • 10. Lambda int pivot = 5; // global variable int main(void) { array<int, 5> ia = { 9, -3, 2, 13, -7}; // defines a named lambda function, used later inside sort auto lambdaf1 = [] (int a, int b) { return std::abs(a) < std::abs(b); }; std::sort(ia.begin(), ia.end(), lambdaf1 ); print_collection(ia); // prints 2 -3 -7 9 13 // pivot global in this case std::sort(ia.begin(), ia.end(), [] (int a, int b) { return std::abs(a - pivot) < std::abs(b - pivot); } ); print_collection(ia); // prints 2 9 -3 13 -7 int pivot2 = 8; // pivot2 local variable pass by ref std::sort(ia.begin(), ia.end(), [pivot2] (int a, int b) { return std::abs(a - pivot2) < std::abs(b - pivot2); } ); print_collection(ia); // prints 9 13 2 -3 -7 return 0; }
  • 11. Lambda class X{ public: int a, b; // class member data // constructor X(int aa = 0, int bb = 2) : a(aa), b(bb) {} void memfun1 (void){ int c = 2, d = 3; // local variables // lambda function definition and call [&, d] (int k) -> void { cout << a << &quot;, &quot; << b << &quot;, &quot; << c << &quot;, &quot; << d << &quot;, &quot; << (c+=k) << endl; return; } (7); } void memfun2(void) { // named lambda function, accesses this auto lambda12 = [this] (int k) { return this + k; }; cout << lambda12(0xFF) << endl; // named lambda function, tries to access by reference, compiler warning ! auto lambda13 = [&this] (int k) -> X * { a += 3; return this + k; }; cout << lambda13(0xEE) << endl; } } int main(void) { X x1; cout << &quot;in X&quot; << endl; x1.memfun1(); // prints 0, 2, 2, 3, 9 cout << x1.a << &quot; before the call&quot; << endl; // prints 0 x1.memfun2(); // prints 0x7fbfffcd00 cout << x1.a << &quot; after the call&quot; << endl; // prints 3 }
  • 12.
  • 13. Un-named lambda, named lambda and function void foo() { int i, j; // local variables … need a function here, called once no other place needs it make it unnamed lambda it will know the context i, j … need a function here that knows the context i, j, and it will be called here and in other locations of this function make it a named lambda … need to call a function here this function may be called from other functions and it does not know about the context of this function foo() so make it an outside function x() and call it here } // this function does no know about // the stack frame of the callee // if the caller needs to pass its context, then // it needs to send it in via function arguments void xx() { int k, l, m; … }
  • 14. Template Traits template <class T> class MMAX { public: enum {mmax = 10 }; }; template<> class MMAX<int> { public: enum {mmax = 1000 }; }; template <> class MMAX<float> { public: enum {mmax = 20000 }; }; template <class V, class TRAIT = MMAX<V> > class Luba{ public: V v[TRAIT::mmax]; }; int main(void) { Luba<int, MMAX<int> > myLuba; Luba<float> myLuba2; return 0; } template trait template default type argument relying on mmax enum of TRAIT Type V and type TRAIT are decoupled
  • 15. Template Traits cont’d const char * table[] = { “ Unknown”, // 0 “ Int”, // 1 “ Float”, // 2 “ X” // 3 }; template <class T> class Trait { public: static const int index = 0; }; template<> class Trait<int> { public: static const int index = 1; }; template<> class Trait<X> { public: static const int index = 3; }; template<class T, class R = Trait<T> > class W { public: T val; static const char * name () { return table[R::index] ; } }; … cout << W<int>::name(); // prints “Int”, relies on default argument of Trait<int> cout << W<X, Trait<X> >::name(); // prints “X” cout << W<char, Trait<char> >::name(); // prints “Unknown” cout << W<double, Trait<int> >::name(); // prints “Int” !! Template specialization Generic Template
  • 16. Template Metaprogramming template<int N> class Factorial { public: enum { value = N * Factorial<N-1>::value }; }; class Factorial<1> { // template specialization for 1 public: enum { value = 1 }; }; … // evaluated at compile-time std::cout << Factorial<3>::value ; // prints 6 int a[Factorial<4>::value]; // 24 template <unsigned long N> struct binary { static unsigned const value = binary<N/10>::value * 2 // prepend higher bits + N%10; // to lowest bit }; template <> // specialization struct struct binary<0> // terminates recursion { static unsigned const value = 0; }; int b[binary<1101>::value]; // compile-time evaluation to 13 Done at compile - time
  • 17. Variadic Templates #include <iostream> #include <typeinfo> template<int size, int... ints> // packing ints class X{ public: void foo(void) { int x[size] = {ints...}; // unpacking ints for(int i = 0; i < size; ++i){ std::cout << x[i] << &quot; &quot;; } std::cout << &quot;&quot;; } }; int main(void) { X<3, 8, 9, 10> x1;; // prints 8 9 10 X<4, 11, 12, 13, 14> x2;; // prints 11 12 13 14 return 0; }
  • 18. Variadic Templates void myprintf(const char * s) {} // no-op template<typename T, typename... Args> void myprintf(const char* s, T value, Args... args) // args are packed { while (*s) { if (*s == '%' && *(++s) != '%') { std::cout << value; // args are unpacked below myprintf(s, args...); // call even when *s == 0 to detect extra arguments return; } std::cout << *s++; } } #if 0 %nm a.out | grep myprintf 00000000004009cb t _GLOBAL__I__Z8myprintfPKc 0000000000400b0c W _Z8myprintfIdIEEvPKcT_DpT0_ 0000000000400a7a W _Z8myprintfIfIdEEvPKcT_DpT0_ 00000000004009e0 W _Z8myprintfIiIfdEEvPKcT_DpT0_ 00000000004008ec T _Z8myprintfPKc #endif int main(void) { int i = 5; float f = 6.6; double d = 9.9; myprintf(&quot;i=% f=% d=%&quot;, i, f, d); // prints i=5 f=6.6 d=9.9 myprintf(&quot;i=% f=%&quot;, i, f, d); // prints i=5 f=6.6 myprintf(&quot;i=% f=% d=% d=%&quot;, i, f, d); // prints i=5 f=6.6 d=9.9 return 0; }
  • 19. Variadic Templates class X { int i; }; class Y { float f; }; class Z { double d; }; // forward declaration template <typename... E> struct TI; // template specialization for no type argument template <> struct TI<> { }; // general (and recursive) template class definition template <typename H, typename... T> // packing type T struct TI<H, T...> : public H, public TI<T...> // unpacking type T {}; int main(void) { cout << sizeof(TI<X, Y, Z>) << endl; // prints 24 cout << sizeof(TI<vector<int>, X, Y>) << endl; // prints 32 return 0; } vector<int> X Y multiple inheritance This is an example of the building block of tuples (TupleImplement TI) using variadic templates. The example is just a small piece of the actual tuple implementation.
  • 20. tuple<T0, T1, T2, .., T9> the std::pair<T1, T2> on steroids pair<int, float> p(3, 9.9); cout << p.first << endl; // print 3 pair<int, pair<float, double> > pxx(5, make_pair(8.8, 11.11)); cout << pxx.second.first << endl; // print 8.8 tuple<int> ti(2); c out << get<0>(ti) << endl; // print 2 tuple<int, float, char> tifc(3, 9.9, 'c'); cout << get<2>(tifc) << endl; // print 'c‘ tuple<int, tuple<char, char, char>, float, int> txxx(3, make_tuple('c', 'd', 'e'), 13.33, 7); cout << get<2>(get<1>(txxx)) << endl; // print 'e' int char char float int char
  • 21. tuple constructing The tuple constructor takes the tuple elements as arguments. For an n-element tuple, the constructor can be invoked with k arguments, where 0 <= k <= 9. For example: tuple<> t; // sizeof == 2 tuple <char> tc; // sizeof == 3 tuple <char, char> tcc; // sizeof == 3 tuple<int, int> t2; // sizeof == 12 tuple<int, int, int, int, int, int, int, int, int> t9; // sizeof == 40 If no initial value for an element is provided, it is default initialized (and hence must be default initializable). For example. class X{ public: X::X(const string & s); // the only ct for X }; tuple<X, X, X> t4; // error, no default ct for X tuple<X, X, X> t5(string(&quot;Jaba&quot;), string(&quot;Daba&quot;), string(&quot;Duu&quot;)); // ok
  • 22. tuple constructing References must be constructed explicitly, for example: tuple<double &> t(4.2); // error, cannot refer to a temporary value tuple<const double &> tdcr(8.0); // OK const tuple<double &> ctdr(8.0); // Error double d = 3.8; tuple<double &> t(d); // OK cout << typeid(t).name(); // prints: class std::tr1::tuple<double &, Nil, Nil, Nil, Nil, Nil, Nil, Nil, Nil> tuple<double &> t2(d + 0.2); // error cannot initialize non-const reference with a temporary tuple<const double &> t3(d + 0.3); // ok
  • 23. make_tuple make_tuple – a more convenient way to create tuples no need to specify the types types are deduced to the plain, value-based, non-reference type int i = 3; float f = 5.5; make_tuple(i, f); // makes a tuple<int, float > make_tuple(8, 9.9); // makes a tuple<int, double> tuple<int, int, double> add_multiply_divide(int a, int b) { return make_tuple(a+b, a*b, double(a)/double(b)); // results in tuple<int, int, double> } class A{}; class B{}; void foo(const A & a, B & b) { make_tuple(a, b); // results in tuple< class A, class B> - types reduced to plain, non-ref type }
  • 24. make_tuple, ref, cref // ref and cref are reference wrappers from <functional> header file - see later A a; B b; const A ca = a; make_tuple(cref(a), b); // creates tuple<const A&, B> make_tuple(ref(a), b); // creates tuple<A&, B> make_tuple(ref(a), cref(b)); // creates tuple<A&, const B&> make_tuple(cref(ca)); // creates tuple<const A&> make_tuple(ref(ca)); // creates tuple< const A&> char aa, b b; make_tuple(aa, bb); // creates tuple<char, char> sizeof == 3 make_tuple(ref(aa), ref(bb)) ; // creates tuple<char &, char &> sizeof == 12 int iii = 22, jjj = 100; cout << (get<0>(make_tuple(ref(iii), ref(jjj))) = 66) << &quot; iii &quot; << iii << &quot;&quot;; // creates a tuple<int &, int &> // first element refers to iii // prints 66 iii 66 int & int &
  • 25.
  • 26. ties - tuples Ties are tuples, where all elements are of non-const reference types. int i; char c; double d; ... tie(i, c, a); The above tie function creates a tuple of type tuple<int&, char&, double&>; The same result could be achieved with the call make_tuple(ref(i), ref(c), ref(a)); A tuple that contains non-const references as elements can be used to 'unpack' another tuple into variables. e.g.: char c; double d; int i; tie(i, c, d) = make_tuple(1,'a', 5.5); std::cout << i << &quot; &quot; << c << &quot; &quot; << d; // prints 1 ‘a’ 5.5 d 0x8000 c 0x8004 i 0x8008 int & 0x8008 char & 0x8004 double & 0x8000 tie(i, c, d) = make_tuple(1,'a', 5.5); int 1 char ‘a’ double 5.5 temporary (lvalue) temporary stack variables non-temporary
  • 27. ties - tuples Ignore There is also an object called ignore which allows you to ignore an element assigned by a tuple. The idea is that a function may return a tuple, only part of which you are interested in. For example int i; char c = ‘x’ ; double d; tuple<int, char, double> tup(2, 'b', 6.6); tie(i, ignore, d) = tup; cout << i << &quot; &quot; << c << &quot; &quot; << d; // prints 2 ‘x’ 6.6 d 0x8000 c 0x8004 i 0x8008 int & 0x8008 double & 0x8000 tie(i, ignore, d) = tup; int 2 char ‘b’ double 6.6 temporary (lvalue) stack variables non-temporary tup 0x800C stack variable // tying to a std::pair int i; char c = ‘x’ ; pair<int, char> pp(33, ‘w’); tie(i, c) = pp; // doesn’t compile (??) tie(i, c) = tuple<int, char>(pp); // works OK
  • 28. tuples - performance All tuple access and construction functions are small inlined one-liners. Therefore, a decent compiler can eliminate any extra cost of using tuples compared to using hand written tuple like classes. Particularly, with a decent compiler there is no performance difference between this code: class hand_made_tuple { A a; B b; C c; public: hand_made_tuple(const A& aa, const B& bb, const C& cc) : a(aa), b(bb), c(cc) {}; A& getA() { return a; }; B& getB() { return b; }; C& getC() { return c; }; }; hand_made_tuple hmt(A(), B(), C()); hmt.getA(); hmt.getB(); hmt.getC(); and this code: tuple<A, B, C> t(A(), B(), C()); t.get<0>(); t.get<1>(); t.get<2>(); There’s a memory overhead of 1-3 bytes per tuple – may be compiler and/or optimization level dependent Compiler error messages are very hard to understand!
  • 29.
  • 30. Usage of - and Issues with - auto_ptr<T> class X{…}; int main(void) { const auto_ptr<X> cap(new X(4)); auto_ptr<X> bb = cap; // compiler error, no copy semantics from const vector<auto_ptr<X> > xv; try{ auto_ptr<X> ap1(new X(3)); // ap1 points to an X xv.push_back(ap1); // compile error: no copy constructor for auto ptr { auto_ptr<X> ap3, ap4; // both point to null ap3 = ap1; // ap1.release() is called internally, ap3 points to X ++ap3; // compiler error, cannot increment auto pointer ap1->x = 7; // this throws !! ap4 = ap3; // ap3.release(), ap4 points to X X & xr = *ap4; // both ap4 and xr refer to same X X * xp = ap4; // compiler error X * xp2 = ap4.get(); // OK, uses auto_ptr.get() member function to get an X* } // ap4 goes out of scope, X is destroyed ap1->x = 3; // throws } catch (...) { cout << &quot;caught throw &quot;; } return 0; }
  • 31. shared_ptr shared_ptr<X> p0; auto_ptr<X> ap = new X; p0.unique() -> false (garbage?) p0.use_count() -> 0 (garbage?) p0 -> false p0 ap X 1 p0 = ap; p0.unique() -> true p0.use_count() -> 1 p0 -> true p0 ap X 2 shared_ptr<X> p1 = p0; X * xp = p0.get(); p0.unique() -> false p0.use_count() -> 2 xp not counted p0 -> true p0 p1 X 3 shared_ptr<X> p2 = p1; p0.unique() -> false p0.use_count() -> 3 p0 -> true p0 p1 X 4 p2 p0.reset(); p1.unique() -> false p1.use_count() -> 2 p1 -> true p0 p1 X 5 p2 p2.reset(new X); p1.unique() -> true p1.use_count() -> 1 p1 -> true p0 p1 X 6 p2 X p1.swap(p2); p1.unique() -> true p1.use_count() -> 1 p1 -> true p0 p1 X 7 p2 X p2 = p1; p1.unique() -> false p1.use_count() -> 2 p1 -> true p0 p1 X 8 p2 X destructor call // p0, p1 and p2 go out of // scope // p0 destructor // p1 destructor // X destructor // finally p2 destructor p0 p1 9 p2 X destructor call xp
  • 32. shared_ptr convertible pointers shared_ptr<X> p0(new X(33, 44)); p0.unique() -> true p0.use_count() -> 1 p0 X 1 shared_ptr<void> vp(p0); // X* is convertible to void* p0.use_count() -> 2 vp.use_count() -> 2 p0 vp X 2 p0.reset(); p0.use_count() -> 0 vp.use_count() -> 1 p0 vp X 3 vp.reset(); // vp knows what destructor to call // b/c the s_p<T> constructor is // templated on the argument (2 templates) p0.use_count() -> 0 vp.use_count() -> 0 p0 vp X 4 // X destructor shared_ptr<void> vp2(new X(1, 2)); vp2.use_count() -> 1 vp2 X 5 shared_ptr<void> vp3(vp2); vp2.use_count() -> 2 vp2 X 6 vp3 shared_ptr<X> p4(vp2); // Error vp2 has no type knowledge to allow construction of p4 vp2 X 7 vp3 p4 shared_ptr<X> p5(*(reinterpret_cast<shared_ptr<X> *>(&vp2))); // OK,ugly vp2 X 8 vp3 p5 vp2.use_count() -> 3
  • 33. shared_ptr X _Ptr _Rep _Ref_cnt_base 4bytes 4bytes 16 bytes _Ptr _Rep 4bytes 4bytes _Ptr _Rep 4bytes 4bytes p0 p1 v0 X _Ptr _Rep _Ref_cnt_base 4bytes 4bytes 16 bytes p2 shared_ptr<X> p0(new X); shared_ptr<X> p1(p0); shared_ptr<void> v0(p0); shared_ptr<x> p2(new X); Note: 16 byte overhead per object, 4 byte overhead per pointer (may be compiler dependent) v0.get() returns _Ptr (X *)
  • 34. shared_ptr<T> pointing to objects on heap, stack, static memory class X{ public: X(const string &); // constructor X(const X &); // copy constructor ~X(); // destructor }; // global object in static memory X xg2(&quot;def&quot;); // global shared X pointer in static memory shared_ptr<X> gsp; // currently null void testSharedPtrToAuto(void) { shared_ptr<X> sp1(new X(&quot;abc&quot;)); // sp1 points to X on heap shared_ptr<X> sp2(&xg2); // sp2 points to xg2, compiles but dangerous!! shared_ptr<X> sp3; // sp3 set to null // sp3 = &xg2; // compiler error X localX(“def”); // one X on stack // gsp = &localX; // compiler error shared_ptr<X> sp4(&localX); // compiles, dangerous, 2 destructor calls to same object!! // when sp2 goes out of scope, it calls the destructor on xg2, bad, crash! }
  • 35. shared_ptr<T> API // comes with an overhead of an extra integer // use it across multiple parties/modules, when there’s no clear ownership of shared data template<class T> class shared_ptr { public: typedef T element_type; shared_ptr(); // never throws // Constructs a shared_ptr that owns the pointer p . template<class Y> explicit shared_ptr(Y * p); // Constructs a shared_ptr that owns the pointer p and the deleter d . template<class Y, class D> shared_ptr(Y * p, D d); ~shared_ptr(); // never throws shared_ptr(shared_ptr const & r); // never throws template<class Y> shared_ptr(shared_ptr<Y> const & r); // never throws template<class Y> explicit shared_ptr(weak_ptr<Y> const & r); // clarify how one type is converted to other template<class Y> explicit shared_ptr(std::auto_ptr<Y> & r); shared_ptr & operator=(shared_ptr const & r); // never throws template<class Y> shared_ptr & operator=(shared_ptr<Y> const & r); // never throws template<class Y> shared_ptr & operator=(std::auto_ptr<Y> & r); void reset(); // never throws template<class Y> void reset(Y * p); template<class Y, class D> void reset(Y * p, D d); T & operator*() const; // never throws T * operator->() const; // never throws T * get() const; // never throws bool unique() const; // never throws long use_count() const; // never throws operator unspecified-bool-type() const; // never throws void swap(shared_ptr & b); // never throws };
  • 36. Loops with shared pointers struct L { Data d; shared_ptr<M> mptr; }; struct M { Stuff s; shared_ptr<L> lptr; }; p1.use_count() -> 2 lptr.use_count() -> 2 mptr.use_count() -> 1 p1 d mptr s lptr L M In case p1 goes out of scope, you lose your only handle to these two structures, and they are left in memory with no way of ever freeing them! p1 d mptr s lptr L M p1.use_count() -> 0 lptr.use_count() -> 1 mptr.use_count() -> 1
  • 37. weak_ptr struct L { Data d; shared_ptr<M> mptr; }; struct M { Stuff s; weak_ptr <L> lptr; }; p1.use_count() -> 1 lptr.use_count() -> 1 mptr.use_count() -> 1 p1 d mptr s lptr L M p1 d mptr s lptr L M p1.use_count() -> 0 lptr.use_count() -> 0 mptr.use_count() -> 1 d mptr s lptr L M lptr.use_count() -> 0 mptr.use_count() -> 0 s lptr L M The weak_ptr class template stores a &quot;weak reference&quot; to an object that's already managed by a shared_ptr .
  • 38. weak_ptr behavior shared_ptr<X> p1(new X); shared_ptr<X> p2 = p1; weak_ptr<X> w0 = p1; w0.unique() -> false w0.use_count() -> 2 w0 -> true p1 X 1 p2 w0 weak_ptr<X> w1 = w0; w0.unique() -> false w0.use_count() -> 2 p1 X 2 p2 w0 w1 // p1 goes out of scope w0.unique() -> true w0.use_count() -> 1 p1 X 3 p2 w0 w1 // w1 goes out of scope w0.unique() -> true w0.use_count() -> 1 X 4 p2 w0 w1 // p2 goes out of scope w0.unique() -> ?? w0.use_count() -> 0 w0.expired() -> true w0 -> false X 5 p2 w0 you cannot have a weak pointer pointing to an object that’s not pointed to by a shared_ptr w0 X this cannot happen A weak_ptr will never call the destructor of the object it points to. U se the weak pointer as an observer to data owned and managed by shared pointer(s)
  • 39. weak_ptr behavior shared_ptr<X> p1(new X); weak_ptr<X> w0 = p1; p1.unique() -> true w0.use_count() -> 1 w0 -> true p1 X 1 w0 p1.unique() -> false w0.use_count() -> 2 p1 X 2 p3 w0 //shared_ptr<X> p2 = w0; // error shared_ptr<X> p3(w0); // explicit // or shared_ptr<X> p3 = w0.lock(); p1 X 3 p3 w0 shared_ptr<X> p4(new X); p4 X p1 X 4 p3 w0 weak_ptr<X> w1 = p4; p4 X w1 p1 X 5 p3 w0 p1.swap(p4); p4 X w1 p1 X 5 p3 w0 w0.swap(w1); p4 X w1 // w1.lock() == p3 == p4
  • 40. weak_ptr API template<class T> class weak_ptr { public: typedef T element_type; weak_ptr(); template<class Y> weak_ptr(shared_ptr<Y> const & r); weak_ptr(weak_ptr const & r); template<class Y> weak_ptr(weak_ptr<Y> const & r); ~weak_ptr(); weak_ptr & operator=(weak_ptr const & r); template<class Y> weak_ptr & operator=(weak_ptr<Y> const & r); template<class Y> weak_ptr & operator=(shared_ptr<Y> const & r); long use_count() const; bool expired() const; shared_ptr<T> lock() const; void reset(); void swap(weak_ptr<T> & b); };
  • 41.
  • 42. Recap C-style arrays; Overloading operators new[] and delete[] in a plain old data class (POD) class X{ public: int luba; static char BUF[10000]; static int cnt; void set(int ii) { luba = ii;} void * operator new (size_t sz){ void * retval = &BUF[cnt]; cnt += sz; return retval; } void * operator new [] (size_t sz){ void * retval = &BUF[cnt]; cnt += sz; return retval; } void operator delete(void * todel){ // currently is a no-op } void operator delete [] (void * todel){ // currently is a no-op } }; char X::BUF[10000]; int X::cnt = 0; int main(void) { X * x1 = new X; x1->set(1); X * xa = new X[3]; xa[0].set(4); xa[1].set(4); xa[2].set(4); X * xaa = new X[2]; xaa[0].set(7); xaa[1].set(7); delete x1; delete [] xa; delete [] xaa; return 1; } x1 1 4 4 4 7 7 BUF xa[0] xaa[0] New operator calls operator new(4) New operator [3] calls operator new[](12) Heap memory
  • 43. C++ Classes; Overloading operators new[] and delete[] in a class with constructor (non POD) class X{ public: int luba; static char BUF[10000]; static int cnt; void set(int ii) { luba = ii;} void * operator new (size_t sz){ void * retval = &BUF[cnt]; cnt += sz; return retval; } void * operator new [] (size_t sz){ void * retval = &BUF[cnt]; cnt += sz; return retval; } void operator delete(void * todel){ // currently is a no-op } void operator delete [] (void * todel){ // currently is a no-op } X(int ii = 9) : luba(ii){} ~X(void) { luba = -1;} }; char X::BUF[10000]; int X::cnt = 0; int main(void) { X * x1 = new X; x1->set(1); X * xa = new X[3]; xa[0].set(4); xa[1].set(4); xa[2].set(4); X * xaa = new X[2]; xaa[0].set(7); xaa[1].set(7); delete x1; delete [] xa; return 1; } x1 1 3 0 4 4 4 2 0 7 7 BUF New operator [3] calls operator new[](20) Allocates from address A, but returns address B !! Number of elements in xa[] xa[0] Number of elements in xaa[] xaa[0] A B Heap memory
  • 44. array<class T, size_t N> array<X, 3> a; // initializes 3 default Xs array<X, 3> b{“I”, “am“}; // “I” “am” “default_string” array<X, 3> d(b); // copy constructor array<X, 3> e = {X(&quot;how&quot;), X(&quot;are&quot;), X(&quot;you?&quot;)}; array<X, 3> f = {&quot;I&quot;, &quot;am&quot;, &quot;fine.&quot;}; e.swap(f); array<X, 4> g; g.swap(f); // compiler error, bad argument type f.swap(g); // compiler error, bad argument type how are you? I am fine. e f default_string a default_string default_string I b am default_string I d am default_string g f cannot be swapped
  • 45. array<class T, size_t N> It has all the standard iterators T & front() T & back() iterator begin() iterator end() reverse_iterator rend() reverse_iterator rbegin()
  • 46. array<class T, size_t N> array<X, 3> a = {“The”, “Seinfeld”, “show”}; a.assign(“blah”); // error, need X as argument a.assign(X(“yadda”)); // OK, set all 3 elements a[0]; // returns reference to elem 0 a[3]; // returns reference to non-existing elem – no bounds check; // bounds-checked, returns reference to elem 0; // throws std::out_of_range exception a.size(); // returns 3 a.max_size(); // return big number a.empty(); // return false array<X, 0> x; x.empty(); // returns true a[i]; // i is runtime variable The Seinfeld show yadda yadda yadda yadda yadda yadda a[0] a[3] yadda yadda yadda unchecked checked throws
  • 47. array<class T, size_t N> array<X, 3> a = {“The”, “Seinfeld”, “show”}; // tuple-like element access get<1>(a); // return Seinfeld get<3>(a); // compile time error, out of bounds get<i>(a); // OK, if i has a compile-time value; // returns the address of a[0] // 2D array array<array<int, 3>, 4> myarr2d2; myarr2d2[3][2] = 3; // 3D array array<array<array<int, 3>, 12>, 33> my3Darray; // etc.
  • 48.
  • 49.
  • 50.
  • 51.
  • 52.
  • 53.
  • 54.
  • 55.
  • 56.
  • 57.
  • 58.
  • 59. Recap: Custom Comparators for STL containers struct Z{ int i, j; Z(int ii, int jj) : i(ii), j(jj) {} bool operator < (const Z & rs) const { if(i < rs.i) return true; if(i == rs.i) return (j < rs.j); return false; } }; void bar() { set<Z> xs; // using Z::op< by default xs.insert(Z(3, 4)); xs.insert(Z(3, 5)); xs.insert(Z(2, 6)); xs.insert(Z(2, 6)); // (2, 6)(3, 4)(3, 5) is the order // one element (2, 6) is dropped (==) } Conditions that the comparator must satisfy: Irreflexivity f(x, x) must be false. Antisymmetry f(x, y) implies !f(y, x) Transitivity f(x, y) and f(y, z) imply f(x, z). Equivalence !f(x, y) and !f(y, x) implies x == y a < a -> false a < b -> !(b < a) a < b && b < c -> a < c !(a < b) && !(b < a) -> a == b
  • 60.
  • 61. Recap: More sophisticated comparator predicates class MyintComparator : public binary_function<int, int, bool> { int referenceArgument_; public: explicit MyintComparator(int refArg) : referenceArgument_(refArg) {} bool operator () (int l, int r) { return std::abs(l - referenceArgument_) < std::abs(r - referenceArgument_); } }; void foo() { std::list<int> myList; myList.push_back(2); myList.push_back(11); myList.push_back(8); myList.sort(MyintComparator(10)); // 11, 8, 2 std::set<int, MyintComparator> mySet(MyintComparator(7)); mySet.insert(2); mySet.insert(11); mySet.insert(8); // 8, 11, 2 // you can also do this with boost bind, see later return; } 2 3 4 5 6 7 8 9 10 11 12 2 3 4 5 6 7 8 9 10 11 12
  • 62. Recap: Mistakes in op < // Bad example struct Z{ int i, j; Z(int ii, int jj) : i(ii), j(jj) {} bool operator < (const Z & rs) const { if(i > rs.i || j > rs.j) return false; return true; } }; Z z1(2, 3), z2(3, 2); // z1 < z2 -> true // z2 < z1 -> true // // Does not satisfy antisymmetry // and Equivalence // // You can have all kinds of issues: // // not finding an inserted element // not getting things back in the correct // order // multiple copies inserted in a set, etc. Say you have a map, which is a red-black tree internally. The tree has the following elements in it: A / B C and you are about to insert element D. The following comparisons will be done: D < A A < D if both are false then it means equality and D is inserted over A if D < A then it will go on the left branch and perform 2 more op< D < B B < D if both are false, then it inserts D over B (or just drops D), if not, it will make further branches left or right from B If the op< is incorrect, then you may insert D and never find it in the set
  • 63. Recap: Mistakes in op < // Bad example struct Z{ int i, j; Z(int ii, int jj) : i(ii), j(jj) {} }; class BadZComp : public binary_function<const Z &, const Z &, bool> { public: bool operator() (const Z & l, const Z & r) { return (l.i < r.i || l.i < r.j); // faulty implementation } }; int main(void) { //set<Z> xs; set<Z, BadZComp> xs; xs.insert(Z(3, 4)); xs.insert(Z(3, 5)); xs.insert(Z(2, 6)); set<Z, BadZComp>::iterator sit = xs.find(Z(3, 4)); return 0; } In order to find element Z(3,4) in the set the operator < needs to return !(a < b) && !(b < a) Z(3, 4) < Z(3, 4) will return true, therefore the element will never be found in the set
  • 64. Unordered associative containers: unordered_map, unordered_multimap, unordered_set, unordered_multiset class WWID { public: int wwid; WWID(int i = -1) : wwid(i) {} bool operator == (const WWID & rs) const { return wwid == rs.wwid; } }; class ShoeSize { public: int shoeSize; ShoeSize(int i = 4) : shoeSize(i) {} bool operator == (const ShoeSize & rs) const { return shoeSize == rs.shoeSize; } }; // Notice: no operator < () required on the types above – no ordering is happening struct MyHash { // return values 0, 1 or 2 s ize_t operator() (const WWID & key) const { return key.wwid % 3; } }; These four types implement the hash tables in TR1 The name hash_table was widely used in legacy code, hence these new names These types are unordered, no operator < is required on them Operator == is required on the key The unordered_map and unordered_set keep unique copies of elements The unordered_multimap and unordered_multiset keep multiple elements with matching keys – lumped together in groups
  • 65. unordered_map const MyHash myhash; // one object // key is WWID, type is ShoeSize unordered_map<WWID, ShoeSize, MyHash> map2( 3, myhash); // need 3 buckets, use myhash cout << map2.bucket_count() ; // prints 8 (upsizes to power of 2 >= 8) cout << map2.max_bucket_count() ; // prints 8 cout << map2.bucket_size(0); // prints 0 cout << map2.size(); // prints 0 – number of element in the unordered_map 0 1 2 3 4 5 6 7 bucket_count() max_bucket_count() bucket_size(0) == 0
  • 66. unordered_map 0 (0, 5) 1 2 3 4 5 6 7 map2.insert(pair<WWID, ShoeSize>(WWID(0), ShoeSize(5))); // bs(0):1 bs(1):0 bs(2):0 size:1 b_cnt:8 max_b_cnt:8 map2.insert(make_pair(WWID(1), ShoeSize(6))); // bs(0):1 bs(1):1 bs(2):0 size:2 b_cnt:8 max_b_cnt:8 map2.insert(pair<WWID, ShoeSize>( 2, 7 )); // bs(0):1 bs(1):1 bs(2):1 size:3 b_cnt:8 max_b_cnt:8 map2.insert(pair<WWID, ShoeSize>( 3, 8 )); // bs(0):2 bs(1):1 bs(2):1 size:4 b_cnt:8 max_b_cnt:8 (1, 6) (2, 7) (3, 8) Due to the implementation of the MyHash::operator() we will only insert elements in the first 3 buckets, even though the bucket_count of the map is 8 bucket_count()
  • 67. unordered_map 0 (0, 5) 1 2 3 4 5 6 7 struct MyHash 2 { // return values 0 thru 8 s ize_t operator() (const WWID & key) const { return key.wwid % 9 ; } }; MyHash2 myhash2; unordered_map<WWID, ShoeSize, MyHash 2 > map2( 3, myhash 2 ); // need 3 buckets, use myhash2 map2.insert(pair<WWID, ShoeSize>(WWID(0), ShoeSize(5))); // bs(0):1 bs(1):0 bs(2):0 bs(7):0 size:1 map2.insert(make_pair(WWID(1), ShoeSize(6))); // bs(0):1 bs(1):1 bs(2):0 bs(7):0 size:2 map2.insert(pair<WWID, ShoeSize>( 2, 7 )); // bs(0):1 bs(1):1 bs(2):1 bs(7):0 size:3 map2.insert(pair<WWID, ShoeSize>( 7, 12 )); // bs(0):1 bs(1):1 bs(2):1 bs(7):1 size:4 map2.insert(pair<WWID, ShoeSize>( 8, 13 )); // bs(0):2 bs(1):1 bs(2):1 bs(7):1 size:5 (1, 6) (2, 7) (8, 13) bucket_count() trying to insert (8, 13) in bucket 8 - unsuccessfully, inserted here instead the hash function returns 8 for (8, 13) but the value is truncated to limit it to buckets 0 thru 7 value &= 0x07; // done automatically for you // having the bucket_count at power of 2 // comes in handy here (7, 12) 8
  • 68. unordered_map 0 (0, 5) 1 2 3 4 5 6 7 cout << map2.load_factor(); // average load factor for a bucket; prints .625 (size()/bucket_count()) cout << map2.max_load_factor(); // prints 4 map2.max_load_factor(0.5); // sets new target load factor map2.rehash(8); // rehash such that the load factor does not exceed target load factor, add new buckets if needed map2.max_load_factor(4); // sets target load factor back to 4 map2.rehash(8); // rehash, get back to original state, get at least 8 buckets (1, 6) (2, 7) (8, 13) bucket_count() == 8 (7, 12) 0 (0, 5) 1 2 3 4 5 6 7 (1, 6) (2, 7) bucket_count() == 16 (7, 12) 8 (8, 13) 15 max_load_factor(0.5) rehash(8) load_factor() == 0.625 (5/8) load_factor() == 0.3125 (5/16) … max_load_factor(4) rehash(8)
  • 69. unordered_map 0 (0, 5) 1 2 3 4 5 6 7 struct MyHash { // return values 0, 1 or 2 s ize_t operator() (const WWID & key) const { return key.wwid % 3; } } myhash; unordered_map<WWID, ShoeSize, MyHash> map2( 3, myhash); // insert the same 5 elements into map2 cout << map2.load_factor(); // average load factor for a bucket; prints .625 (size()/bucket_count()) cout << map2.max_load_factor(); // prints 4 map2.max_load_factor(0.5); // sets new target load factor map2.rehash(8); // rehash such that the load factor does not exceed target load factor, add new buckets if needed (1, 6) (2, 7) (8, 13) bucket_count() == 8 (7, 12) rehash(8) load_factor() == 0.625 (5/8) 0 (0, 5) 1 2 3 4 5 6 7 (1, 6) (2, 7) (8, 13) bucket_count() == 16 (7, 12) load_factor() == 0.3125 (5/16) 15 elements are not moved to different buckets because the hash function returns only values 0, 1, 2 we did get a lower load factor but it’s not useful, all elements will be in buckets 0,1,2
  • 70. unordered_map (0, 5) (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) (0, 5) (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) map2.begin() internally implemented as one big list (14, 6) (11, 17) (33, 5) (14, 6) (11, 17) (33, 5) map2.end() map2.begin(0) map2.begin(1) map2.end(0) map2.end(2) map2.end(1) map2.begin(2) map2.begin(3) map2.end(3) unordered_map<WWID, ShoeSize, MyHash> ::iterator unordered_map<WWID, ShoeSize, MyHash> ::local_iterator 0 1 2 3 0 1 2 3
  • 71. unordered_map 0 (0, 5) 1 2 struct MyHash { // return values 0, 1 or 2 s ize_t operator() (const WWID & key) const { return key.wwid / 10 ; } } myhash; unordered_map<WWID, ShoeSize, MyHash> map2( 8, myhash); // insert (12,7), (0,5), (7,12), (1, 6), (8,13), (2,7) map2.insert(make_pair(WWID( 2 ), ShoeSize( 7 ))); // attempting to re-insert (2,7) (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) 0 (0, 5) 1 2 (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) _list.begin() _list.end() internally implemented as one list; when we try to insert (2,7) for the second time, it searches backward within the list-portion of bucket 0 searches this portion of list the insert attempt returns an iterator pointing to original (2,7) paired with a bool false denoting the fact that the element was already in the map
  • 72. unordered_map 0 (0, 5) 1 2 // returns an iterator to element (8, 13) unordered_map<WWID, ShoeSize, MyHash> ::iterator mit = map2.find(8); // passing mit as hint iterator to speed up insertion map2.insert( mit, make_pair(WWID( 1), ShoeSize(7))); (1, 6) (2, 7) (8, 13) (7, 12) (12, 7) mit backit forwit compiler dependent usage of hint iterator mit could do 2 finger approach to search away from mit (Dinkumware didn’t use the hint though..) will find already existing element (1, 6) and skip re-inserting returns pair (result, false) – the keys of (1, 6) and (1, 7) match, insert will look for key 1, will find (1, 6) and not insert (1,7) over (1,6) result
  • 73. unordered_multiset 0 1 2 unordered_multiset<WWID, MyHash> set1; set1.insert(2); set1.insert(3); set1.insert(2); set1.insert(3); set1.insert(11); unordered_multiset<WWID, MyHash>::iterator i = set1.find( 2 ); // find one element with matching value // return an iterator pair ranging all the elements with matching values pair<unordered_multiset<WWID, MyHash>::iterator, unordered_multiset<WWID, MyHash>::iterator> itpair = set1.equal_range(3); for(unordered_multiset<WWID, MyHash>::iterator i1 = itpair.first; i1 != itpair.second; ++i1){ cout << &quot;i1 &quot; << *i1 << endl; } set1.insert(itpair.first, 3); // inserting elem 3 for the 3 rd time, passing a hint iterator to speed up insertion cout << set1.count(3); // prints 3 3 11 3 2 2 i itpair.first itpair.second ++ ++ 3
  • 74. time complexity of unordered associative containers X a(n, hash_fun); // construct a container with at least n buckets, O(n) X a(first, last, n, hash_fun); // construct a cointainer with at least n buckets, then insert elements in the // iterator range into the container; average O(N), worst O(N^2) - N is the number of elements inserted a.insert(object); // average O(1), worst O(a.size()) a.insert(hint_iterator, object); // average O(1), worst O(a.size()) a.erase(key); // erases element with matching key; average O(a.count(key)), worst O(a.size()) a.erase(it1, it2); // erases elements between the 2 iterators; average O(distance(it1, it2)), worst O(a.size()) a.clear(); // removes all elements; O(a.size()) a.find(k); // find element with matching key; average O(1), worst O(a.size()) a.count(k); // count elements with maching key; average O(1), worst O(a.size()) a.equal_range(k); // return a pair of iterators spanning the range with matching key; average O(a.count()), worst O(a.size()) a.bucket_size(n); // O(M) where M is the number of objects in bucket n the ordered associative containers set, multiset, map, multimap use a red-black tree for their implementation their insertion and query times are O(log(N)) the unordered associative containers have average times of insertion and query of O(1) – but it needs careful design and babysitting – or else they can deteriorate to O(n)
  • 75. mem_fn (member function) Function Template // File #include <iostream.h> void g(int i) { cout << i ;} class XYZ{ public: int i; void luba(int ii); // non-static member function }; void XYZ::luba(int ii) { i = ii; cout << this << “,” << i << endl; } /* File t2.c */ #include <stdio.h> /* declare a mangled C++ global function */ void g__Fi(int); /* declare a mangled C++ member function */ void luba__3XYZFi(void *, int); void f(void) { int j = 66; g__Fi(j); /* will call g() from */ void * vp = malloc(100); /* will call void XYZ::luba(int); */ luba__3XYZFi(vp, j); /* passing vp to */ /* become this pointer ! */ } // nm t1.o will report: // Name value scope type subspace // g__Fi | 0| extern| entry| $CODE$ // luba__3XYZFi| 0| extern| entry| $CODE$ /* nm t2.o will report */ /* f | 0| extern| entry| $CODE$ */ /* g__Fi | | undef| code| */ /* luba__3XYZFi| | undef| code| */ C++ file C file Example of calling C++ member function from C code. Educational purpose only. Do not do this at home!
  • 76. mem_fn (member function) Function Template class AA{ int i; public: AA(int j = 0) : i(j) {} static int f0(double d) { return static_cast<int>(d);} int f1(double d) const {return static_cast<int>(d * i);} const AA operator + (const AA & rs) const { AA retval; retval.i = i + rs.i; return retval; } int geti(void) const { return i; } }; int foo(const AA & a) { return a.geti();} void mem_fn_test(void) { int (* fooptr) (const AA &) = foo; // just a pointer to a global function int (AA:: * f1ptr)(double)const = &AA::f1; // pointer to a member function of AA // pointer to a member operator of AA const AA (AA:: * operatorPlusPtr)(const AA &) const = &AA::operator +; int (* f0ptr)(double) = &AA::f0; // pointer to static member function AA a1(13); AA * a2 = new AA(14); shared_ptr<AA> a3(new AA(15)); weak_ptr<AA> a4 = a3; // continued on next page
  • 77. mem_fn (member function) Function Template // continued from prev page mem_fn(f1ptr)(a1, 9.9); // equivalent to a1.f1(9.9); mem_fn(f1ptr)(a2, 11.3); // equivalent to a2->f1(11.3); - notice that pointers and non-pointers mem_fn(f1ptr)(a3, 13.5); // equivalent to a3->f1(13.5); - are handled with same syntax / / mem_fn(f1ptr)(a4, 15.1); // equivalent to a4->f1(15.1); // doesnt' compile ( f0ptr)(8.8); // ok – calling a static member function, does not need an object address for this (f1ptr)(8.8); // error - term does not evaluate to a function taking 1 argument AA a5 = mem_fn(operatorPlusPtr)(a2, a1); // equivalent to a5 = (*a2) + a1 AA a6 = mem_fn(operatorPlusPtr)(a1, *a2); // equivalent to a6 = a1 + (*a2) AA a7 = mem_fn(operatorPlusPtr)(a2, *a3); // equivalent to a7 = (*a2) + (*a3) AA a8 = mem_fn(operatorPlusPtr)(a3, *a2); // equivalent to a8 = (*a3) + (*a2) }
  • 78. The reference_wrapper Class Template class RR{ // sizeof(RR) == 4 public: int & ir; RR(int & j) : ir(j) {} }; int b; // some global int class PP{ // sizeof(PP) == 4 public: reference_wrapper<int> irw; PP(int & k) : irw(k) { irw = b; // error } }; void test_reference_wrapper(void) { int ii(11), jj(12); RR r1(ii), r2(jj); r1 = r2; // error, don’t know how to copy references PP p1(ii), p2(jj); cout << p1.irw.get() ; // prints 11 p1 = p2; cout << p1.irw.get() ; // prints 12 }
  • 79. The reference_wrapper Class Template class AA{ int i; public: AA(int j = 0) : i(j) {} static int f0(double d) { return static_cast<int>(d);} int f1(double d) const {return static_cast<int>(d * i);} const AA operator + (const AA & rs) const { AA retval; retval.i = i + rs.i; return retval; } int geti(void) const { return i; } }; int foo(const AA & a) { return a.geti();} int bar(const AA & a) { return a.geti() + 2; } void test_reference_wrapper 2 (void) { typedef int (* const myfuntype) (const AA &); reference_wrapper<myfuntype> fooref(&foo); // creating a reference wrapper to foo() reference_wrapper<int (* const) (const AA &)> barref(&bar) ; // ref wrapper to bar() AA a1(12); fooref(a1); // calling foo barref(a1); // calling bar /* supposed to work but doesn't fooref = cref(bar); fooref(a1); // calling bar */ reference_wrapper<int (AA:: * const)(double)const> memref(&AA::f1); mem_fn(memref.get())(a1, 8.7); // calling a member function via the reference wrapper }
  • 80. The function Class Template class AA{ int i; public: AA(int j = 0) : i(j) {} }; int foo(const AA & a) { cout << &quot;foo&quot; << endl; return a.geti();} int bar(const AA & a) { cout << &quot;bar&quot; << endl; return a.geti();} void duba(void) { cout << &quot;in dubai&quot;; } void test_function(void) { function<void ()> fd(duba); // constructing a function object fd(); // call duba typedef int (myfuntype) (const AA &); // typedef of a function (not a function pointer) function<myfuntype> fun1; // defining empty function AA a1(12); fun1 = foo; // assigning a global function to fun1 fun1(a1); // calling fun1 with argument a1 fun1 = bar; // fun1(a1); // calling bar }
  • 81. The function Class Template class YY{ public: int _y; YY(int y) : _y(y) {} int operator () () const { cout << &quot;In YY::op(), this: &quot; << hex << this << dec << &quot;&quot;; return 0 ; } }; int yme() { cout << &quot;In yme&quot; << endl; return 5;} void fxx(void) {} void test_function(void) { YY y1(13); // one object function<int ()> fun2; // function object returning int, taking no arguments fun2 = y1; // copy of y1 made fun2(); // call y1.op() – operates on a copy of y1 – 13 y1._y = 55; // modifying y1 fun2(); // still operates on the original copy of y1 – 13 (not 55 !) cout << &quot;sizeof fun2 &quot; << sizeof(fun2) << endl; // prints 4 fun2 = YY(33); cout << (bool)(<YY>()) << endl; // true cout << (bool)(<int ()>()) << endl; // false fun2(); // calling YY::op() on another object cout << &quot;fun2.target_type().name() &quot; << fun2.target_type().name() << endl; // prints class YY // fun2.target_type() returns a reference to type_info object fun2 = yme; fun2(); // calling yme cout << &quot;fun2.target_type().name() &quot; << fun2.target_type().name() << endl; // prints int (__cdecl *)(void) cout << &quot;<YY> &quot; << (bool)(<YY>()) << endl; // false cout << &quot;<yme> &quot; << (bool)(<int ()>()) << endl; // true ?? why not?? //fun2 = f xx ; // error , non-matching type }
  • 82. The function Class Template void test_function(void) { YY y3(33), y4(44); function<int ()> fun3, fun4; cout << &quot;about to fun3 = y3&quot; << endl; fun3 = y3; // y3 copied by value cout << &quot;about to fun4 = y4&quot; << endl; fun4 = y4; // y4 copied by value cout << &quot;calling fun3()&quot; << endl; fun3(); cout << &quot;calling fun4()&quot; << endl; fun4(); cout << &quot;fun3 = 0&quot; << endl; fun3 = 0; // setting fun3 to zero fun3.swap(fun4); // swapping the 2 functions, fun3 holds copy of y4, fun4 is zero if(fun3) {cout << &quot;calling fun3()&quot; << endl; fun3(); } // executed on copy of y4 if(fun4) {cout << &quot;calling fun4()&quot; << endl; fun4();} // if(fun4) evaluates to false, no call }
  • 83. The bind Function Template // Recap std:: bind1st, bind2nd // creating a predicate class myLessThan : public binary_function<int, int, bool> { public: bool operator () (int a, int b) const { return a < b; } }; bool myLessThan3(int a) { return a < 3; } void test_std_bind() { array<int, 4> arr = {1, 2, 3, 4}; int k ; count_if(arr.begin(), arr.end(), myLessThan3, k); // sets k to 2 count_if(arr.begin(), arr.end(), bind2nd(myLessThan(), 3), k); // (*it < 3) sets k to 2 count_if(arr.begin(), arr.end(), bind1st(myLessThan(), 3), k); // (3 < *it) sets k to 1 }
  • 84. The bind Function Template bool myLessThan3(int a) { return a < 3; } bool myLessThanF(int a, int b) { return a < b; } using namespace std::tr1::placeholders; // for _1, _2, etc void test_bind() { bind(myLessThan3, _1)(4); // 4 < 3 false bind(myLessThan3, 4)(); // 4 < 3 false bind(myLessThan3, 2)(); // 2 < 3 true bind(myLessThanF, 4, 5 )(); // myLessThanF (4, 5) returns true bind(myLessThanF, 4, _1)(5); // myLessThanF (4, 5) returns true bind(myLessThanF, _1, _2)(4, 5); // myLessThanF (4, 5) returns true bind(myLessThanF, _2, _1)(4, 5); // myLessThanF ( 5, 4 ) returns false bind(myLessThanF, _1, 5)(4); // myLessThanF (4, 5) returns true // using runtime values int val1; cout << &quot;val1: &quot;; cin >> val1; int val2; cout << &quot;val2: &quot;; cin >> val2; b = bind(myLessThanF, val1, _1)(val2); // will perform myLessThanF(val1, val2) }
  • 85. The bind Function Template bool myLessThanF(int a, int b) { return a < b; } void test_bind() { array<int, 4> arr = {1, 2, 3, 4}; int k = 0; count_if(arr.begin(), arr.end(), bind(myLessThanF, 3, _1), k); // k set to 1 // 3<1f 3<2f 3<3f 3<4t // 1 2 3 4 count_if(arr.begin(), arr.end(), bind(myLessThanF, _1, 3), k); // k set to 2 // 1<3t 2<3t 3<3f 4<3f // 1 2 3 4 }
  • 86. The bind Function Template bool inBetweenF(int l, int m, int r) { return (l <= m && m <= r); } void test_bind() { inBetweenF(7, 8, 9) ; // call (7, 8, 9) -> true bind(inBetweenF, _1, _2, _3)(7, 8, 9) ; // call (7, 8, 9) -> true bind(inBetweenF, _1, _3, _2)(7, 8, 9) ; // c all (7, 9, 8) -> false bind(inBetweenF, 7, _1, _2) (8, 9) ; // call (7, 8, 9 ) -> true bind(inBetweenF, 7, 7, _1) (8) ; // call (7, 7, 8) -> true bind(inBetweenF, 7, _1, 8) (7) ; // call (7, 7, 8) -> true bind(inBetweenF, _1, 7, 9) (6) ; // call (6, 7, 9) -> true bind(inBetweenF, _1, _1, _1)(10, 11, 12) ; // call (10, 10, 10) -> true bind(inBetweenF, _3, _4, _4)(11, 22, 33, 44) ; // c all (33, 44, 44) -> true } notice 4 arguments
  • 87. The bind Function Template bool inBetweenF(int l, int m, int r) { return (l <= m && m <= r); } void test_bind() { array<int, 4> arr = {1, 2, 3, 4}; int k = 0; count_if(arr.begin(), arr.end(), bind(inBetweenF, 1, 2, _1), k); // k set to 3 // (1,2, 1 )f (1,2, 2 )t (1,2, 3 )t (1,2, 4 )t // 1 2 3 4 count_if(arr.begin(), arr.end(), bind(inBetweenF, 1, _1, 4), k); // k set to 4 // (1, 1 ,4)t (1, 2 ,4)t (1, 3 ,4)t (1, 4 ,4)t // 1 2 3 4 count_if(arr.begin(), arr.end(), bind(inBetweenF, 2, _1, 3), k); // k set to 2 // (2, 1 ,3)f (2, 2 ,3)t (2, 3 ,3)t (2, 4 ,3)f // 1 2 3 4 count_if(arr.begin(), arr.end(), bind(inBetweenF, _1, 2, 4), k); // k set to 2 // ( 1 ,2,4)t ( 2 ,2,4)t (3,2,4)f (4,2,4)f // 1 2 3 4 }
  • 88. The bind Function Template class WW{ public: int w_; WW(int w = 0) : w_(w) {} bool amIinBetween(int l, int r) { return (l <= w_) && (w_ <= r); } int operator () (int i) const { return i; } typedef int result_type; }; class TT{ public: int t_; TT(int t = 0) : t_(t) {} int foo(int i, int j) const { return i + j + t_; } }; void test_bind (void) { array<WW, 3> war = {1, 2, 3}; int k = 0; count_if(war.begin(), war.end(), bind(&WW::amIinBetween, _1, 2, 4), k); // k set to 2 // testing bind chaining WW w1(11), w2(33); w1(3); // call w1.op()(3) TT t1(13); bind(w1, 22)(); // equivalent to w1(22) bind(w2, bind(w1, 7)())(); // eq to w2(w1(7)) bind(&TT::foo, t1, 5, 6)(); // calls, 6) bind(&TT::foo, _3, _2, _1)(5, 6, t1); // calls 6, 5 ) / / supposed to work but doesn't compile //bind(&TT::foo, t1, h, bind(w1, _1))(2); //bind(&TT::foo, t1, bind(w1, _2), bind(w2, _1))(27, 37); // call, w2(27)) } binding a non-static member function of class WW to fixed arguments 2 and 4 the first argument _1 is the object of type WW the member function is called on
  • 89. The bind Function Template bool inBetweenF(int l, int m, int r) { return (l <= m && m <= r); } bool matchDelta(int l, int m, int r) { return (m == abs(l - r)); } void test_function_bind() { array<int, 3> r0 = { 1, 2, 3}, r1 = {11, 2, 1}; array< array<int, 3>*, 2> a2d = {&r0, &r1}; array<function<bool (int, int, int)>, 2> af; // 2 element array of function objects (empty) af[0] = inBetweenF; af[1] = matchDelta; for(int t = 0; t < 2; ++t){ // iterate twice for(int row = 0; row < 2; ++row){ // iterate on each row int k; count_if(a2d[row]->begin(), a2d[row]->end(), bind(af[row], 2, _1, 4), k); } af[0].swap(af[1]); } } 1 2 3 11 2 1 a2d inBetweenF matchDelta 2, 1 ,4 2, 2 ,4 2, 3 ,4 2, 1 ,4 2, 2 ,4 2, 3 ,4 inBetweenF matchDelta 2, 11 ,4 2, 2 ,4 2, 1 ,4 2, 11 ,4 2, 2 ,4 2, 1 ,4
  • 90. The bind Function Template function<bool (int, float, char, double)> f4p; function<bool (int, float, char)> f3p; function<bool (int, float)> f2p; function<bool (int)> f1p; f4p = fooo4; f4p(3, 5.5, 'w', 9.9); f3p = fooo3; f3p(3, 5.5, 'w'); f2p = fooo2; f2p(3, 5.5); f1p = fooo1; f1p(3); bind(f4p, _1, 7.7, _2, 9.9)(5, 'w'); bind(f4p, 5, 7.7, _2, _1)(9.8, 'w'); f3p = bind(f4p, _1, _2, _3, 11.11); f3p(3, 5.5, 'w'); f2p = bind(f4p, _1, _2, 'w', 13.13); f2p(5, 6.6); f2p = bind(f3p, _1, _2, 'w'); f2p(7, 8.8); f1p = bind(f4p, _1, 7.7, 'w', 9.9); f1p(5); f1p = bind(f3p, _1, 7.7, 'w'); f1p(5); f1p = bind(f2p, _1, 7.7); f1p(5); function <bool (double)> f1pd; f1pd = bind(f4p, 5, 7.7, 'w', _1); f1pd(9.9); bool fooo4(int i, float j, char c, double d) {/* */} bool fooo3(int i, float j, char c) {/* */} bool fooo2(int i, float) {/* */} bool fooo1(int i) {/* */} bool fooo4( int i, float j, char c, double d ) bind(f4p, _1, 7.7, _2, 9.9) (5, 'w'); _1 _2 i j c d
  • 91. Peformance C++ : inlining C++ code tends to have many many small functions (much more than C) constructors, destructors, operators, member access functions, etc. Function call overhead may dominate all your runtime without being aware of it Inlining is the first and most important runtime optimization you need to consider! class X{ public: X() {} // default constructor X(int i) {} // constructor taking one int as argument ~X() {} // destructor X & operator = (const X & rs) {} // assignment bool operator == (const X & rs) const {} // comparison operator bool operator != (const X & rs) const {} // comparison operator X & operator ++ ( ) {} // pre-increment X operator ++ (int) {} // post-increment // … }; int main(void) { for(X x1(0); x1 != X(100); x1++){ X temp[10]; // 1ct + 100*(1comp + 1ct + 1dt + 1ct + 1dt + 1post_incr + //… // 10ct + 1ass + 10dt) = 2701 function calls temp[i] = x1; } return 0; } For this example, if you don’t have good inlining, your code will be very slow!
  • 92. inlining – case study1 // declaration int myMax(int a, int b); // definition int myMax(int a, int b) { return a > b ? a : b; } long foo(int v1, int v2) { long k = 0; for(int i = 0; i < v1; ++i) for(int j = 0; j < v2; ++j) k += myMax(i, j); return k; } int main(void) { foo(500000, 10000); return 0; } .globl _Z5 myMax ii .type _Z5myMaxii, @function _Z5myMaxii: .LFB2: .file 1 &quot;; .loc 1 8 0 pushq %rbp .LCFI0: movq %rsp, %rbp .LCFI1: movl %edi, -4(%rbp) movl %esi, -8(%rbp) .loc 1 10 0 movl -4(%rbp), %eax cmpl -8(%rbp), %eax jle .L2 movl -4(%rbp), %eax movl %eax, -12(%rbp) jmp .L3 .L2: movl -8(%rbp), %eax movl %eax, -12(%rbp) .L3: movl -12(%rbp), %eax .loc 1 11 0 leave ret g++ elapsed time: 32.85 0000000000000032 T _Z3fooii 0000000000000000 T _Z5myMaxii .globl _Z3 foo ii .type _Z3fooii, @function _Z3fooii: .LFB3: .loc 1 14 0 pushq %rbp .LCFI2: movq %rsp, %rbp .LCFI3: subq $24, %rsp .LCFI4: movl %edi, -20(%rbp) movl %esi, -24(%rbp) .LBB2: .loc 1 16 0 movq $0, -16(%rbp) .LBB3: .loc 1 17 0 movl $0, -8(%rbp) jmp .L6 .L9: .LBB4: .loc 1 18 0 movl $0, -4(%rbp) jmp .L7 .L8: .loc 1 19 0 movl -4(%rbp), %esi movl -8(%rbp), %edi call _Z5myMaxii cltq addq %rax, -16(%rbp) .loc 1 18 0 addl $1, -4(%rbp) .L7: movl -4(%rbp), %eax cmpl -24(%rbp), %eax jl .L8 .LBE4: .loc 1 17 0 addl $1, -8(%rbp) .L6: movl -8(%rbp), %eax cmpl -20(%rbp), %eax jl .L9 .LBE3: .loc 1 20 0 movq -16(%rbp), %rax .LBE2: .loc 1 22 0 leave ret myMax function body call to myMax
  • 93. inlining – case study1 // declaration inline int myMax(int a, int b); // definition int myMax(int a, int b) { return a > b ? a : b; } long foo(int v1, int v2) { long k = 0; for(int i = 0; i < v1; ++i) for(int j = 0; j < v2; ++j) k += myMax(i, j); return k; } int main(void) { foo(500000, 10000); return 0; } .globl _Z5 myMax ii .type _Z5myMaxii, @function _Z5myMaxii: .LFB2: .file 1 &quot;; .loc 1 8 0 pushq %rbp .LCFI0: movq %rsp, %rbp .LCFI1: movl %edi, -4(%rbp) movl %esi, -8(%rbp) .loc 1 10 0 movl -4(%rbp), %eax cmpl -8(%rbp), %eax jle .L2 movl -4(%rbp), %eax movl %eax, -12(%rbp) jmp .L3 .L2: movl -8(%rbp), %eax movl %eax, -12(%rbp) .L3: movl -12(%rbp), %eax .loc 1 11 0 leave ret g++ elapsed time: 22.94 0000000000000020 T _Z3fooii 0000000000000000 T _Z5myMaxii _Z3fooii: .LFB3: .loc 1 14 0 pushq %rbp .LCFI2: movq %rsp, %rbp .LCFI3: movl %edi, -36(%rbp) movl %esi, -40(%rbp) .LBB7: .loc 1 16 0 movq $0, -24(%rbp) .LBB8: .loc 1 17 0 movl $0, -16(%rbp) jmp .L6 .L11: .LBB9: .loc 1 18 0 movl $0, -12(%rbp) jmp .L7 .L10: movl -16(%rbp), %eax movl %eax, -4(%rbp) movl -12(%rbp), %eax movl %eax, -8(%rbp) .LBB10: .LBB11: .loc 1 10 0 movl -4(%rbp), %eax cmpl -8(%rbp), %eax jle .L8 movl -4(%rbp), %eax movl %eax, -44(%rbp) jmp .L9 .L8: movl -8(%rbp), %eax movl %eax, -44(%rbp) .L9: movl -44(%rbp), %eax .LBE11: .LBE10: .loc 1 19 0 cltq addq %rax, -24(%rbp) .loc 1 18 0 addl $1, -12(%rbp) .L7: movl -12(%rbp), %eax cmpl -40(%rbp), %eax jl .L10 .LBE9: .loc 1 17 0 addl $1, -16(%rbp) .L6: movl -16(%rbp), %eax cmpl -36(%rbp), %eax jl .L11 .LBE8: .loc 1 20 0 movq -24(%rbp), %rax .LBE7: .loc 1 22 0 leave ret myMax function body body of myMax inserted into code no more call to myMax
  • 94. inlining – case study1 // declaration inline int myMax(int a, int b); // definition int myMax(int a, int b) { return a > b ? a : b; } long foo(int v1, int v2) { long k = 0; for(int i = 0; i < v1; ++i) for(int j = 0; j < v2; ++j) k += myMax(i, j); return k; } int main(void) { foo(500000, 10000); return 0; } g++ -O2 elapsed time: 6.26 0000000000000000 T _Z3fooi .globl _Z3fooii .type _Z3fooii, @function _Z3fooii: .LFB3: .file 1 &quot;; .loc 1 14 0 .LVL0: .LBB7: .LBB8: .LBB9: .loc 1 18 0 xorl %r8d, %r8d .LVL1: .LBE9: .loc 1 17 0 xorl %ecx, %ecx .LVL2: testl %edi, %edi jle .L5 .LVL3: .p2align 4,,10 .p2align 3 .L3: xorl %edx, %edx .LVL4: .LBB10: .loc 1 18 0 testl %esi, %esi jle .L7 .p2align 4,,10 .p2align 3 .L9: .loc 1 19 0 cmpl %ecx, %edx movl %ecx, %eax cmovge %edx, %eax .loc 1 18 0 addl $1, %edx .LVL5: .loc 1 19 0 cltq addq %rax, %r8 .loc 1 18 0 cmpl %edx, %esi jg .L9 .L7: .LBE10: .loc 1 17 0 addl $1, %ecx .LVL6: cmpl %ecx, %edi jg .L3 .L5: .LBE8: .LBE7: .loc 1 22 0 movq %r8, %rax ret body of myMax inserted into code no more call to myMax myMax function doesn’t even show up in the symbol table
  • 95. inline – comparing various options 6 11 22 22 32 32 Runtime No Yes Yes Yes Yes Yes does the object file contain the function? Yes No Yes Yes No No did compiler inline the function? No No Yes Yes No No __attribute__ ((always_inline)) compiler directive No Yes Yes No No No --no-inline compiler option Yes Yes No No Yes No inline keyword Yes Yes No No No No -O2
  • 96. inlining – case study2 // t.h #define II inline //#define II class X{ private: int i_; public: X(int i); ~X(); bool operator > (const X & rs) const; X & operator = (const X & rs); X & operator ++ (void); int i(void) const; }; II X::X(int i) : i_(i) {} II X::~X() {} II bool X::operator > (const X & rs) const { return i_ > rs.i_; } II int X::i(void) const { return i_; } II X & X::operator ++(void) { ++i_; return * this; } // declaration II int myMax(X a, X b); // definition int myMax(X a, X b) { return a > b ? a .i() : b.i(); } #ifdef II #undef II #endif g++ -O2 nm t.o 0000000000000000 T _Z3foo1XS_ runtime 2.3 // #include &quot;t.h&quot; long foo(X v1, X v2) { long k = 0; for(X i = 0; !(i > v1); ++i) for(X j = 0; !(j > v2); ++j) k += myMax(i, j); return k; } // #include &quot;t.h&quot; long foo(X v1, X v2); int main(void) { long res = foo(X(200000), X(10000)); return 0; } g++ nm t.o 00000000000000cd T _Z3foo1XS_ 000000000000008d T _Z5myMax1XS_ 0000000000000016 T _ZN1XC1Ei 0000000000000000 T _ZN1XC2Ei 0000000000000036 T _ZN1XD1Ev 000000000000002c T _ZN1XD2Ev 0000000000000070 T _ZN1XppEv 0000000000000060 T _ZNK1X1iEv 0000000000000040 T _ZNK1XgtERKS runtime 37.46 16x speedup!
  • 97. inlining – case study 3 class Rectangle{ public: int xl, yl, xh, yh; }; void foo(Rectangle r1) { r1.xl = r1.yh; } .type _Z3foo9Rectangle, @function _Z3foo9Rectangle: .LFB2: pushq %rbp .LCFI0: movq %rsp, %rbp .LCFI1: movq %rdi, -16(%rbp) movq %rsi, -8(%rbp) movl -4(%rbp), %eax movl %eax, -16(%rbp) leave ret stack pointer yh Return Address 0 -4 xh -8 yl -12 xl -16 The assignment in question is executed in 2 assembly instructions Very fast! As fast as C. eax register
  • 98. inlining – case study 3 class D1D{ private: int v_; // 0 - LOW, 1 - HIGH public: D1D(int i) : v_(i) {} operator int () { return v_;} }; class D2D{ private: int v_; // 0 WEST, 1 EAST, 2 SOUTH, 3 NORTH public: D2D(int v) : v_(v) {} operator D1D () { return D1D(v_ & 1); } bool isVertical() { return (v_ >> 1); } operator int (void) { return v_;} }; const D1D LOW(0), HIGH(1); const D2D WEST(0), EAST(1), SOUTH(2), NORTH(3); class Interval{ private: int v_[2]; public: int & get(D1D d) { return v_[d]; } Interval(int l, int h) { v_[0] = l; v_[1] = h; } }; class Rectangle{ private: Interval i_[2]; public: int & get(D2D d) { return i_[d.isVertical()].get(d); } }; void foo(Rectangle r1) { r1.get(WEST) = r1.get(NORTH); } isVertical Interval::get D1D::op int() D2D::op D1D() The foo () function can be made almost as fast as the one in the previous page, provided that all the functions inline. Otherwise, could be much slower!
  • 99. inlining – migration f1 f2 10 f3 10 f4 4 f5 4 f6 6 small small small small big big f inlined function f out of line function problem (4 calls) f1 f2 10 f3 10 f4 4 f5 4 f6 6 small small small small big big problem (10 calls) // force f4 to be inlined inline void f4() __attribute__ ((always_inline)) the compiler was forced to inline f4, but in the process it gave up on trying to inline f3, so the code actually became slower!
  • 100.
  • 101. Code vs. Heap vs. Stack vs. Static Memory int a; static int b; extern int x = 3; namespace BB{ int b; } class Z{ public: static int zz; int k; }; int Z::zz; extern int y; // no storage Z z1; void foo() {} void bar() {} int main(void) { int jj; static int s; Z z2; char * cp; cp = new char[100]; Z * zp; zp = new Z; } Static Memory a 0x6013c0 b 0x6013d4 x 0x601298 BB::b 0x6013c4 Z::zz 0x6013c8 zp->zz 0x6013c8 z1 0x6013cc s 0x6013d8 Code Memory foo 0x4009e0 bar 0x4009f0 main 0x400a30 Heap Memory *zp 0x602080 zp->k 0x602080 *cp 0x602010 Stack Memory jj 0x7fbfffeafc z2 0x7fbfffeaf0 cp 0x7fbfffeae8 zp 0x7fbfffeae0 the 4 memory segments can be placed in very different address spaces
  • 102. Peformance C++ : Caching int main(void) { list<int> s; vector<int> v; for(int i = 0; i < 50000000; ++i){ s.push_back(i); v.push_back(i); } long ss = 0, sv = 0; for(set<int>::iterator sit = s.begin(); sit != s.end(); ++sit){ ss += *sit; } for(int i = 0; i < v.size(); ++i){ sv2 += v[i]; } return 0; } next prev int next prev int next prev int next prev int next prev control list allocator (heap) of list, all list elements allocated one after each other begin middle end vector 0.54 sec 0.09 sec 6x faster vector is faster, elements are next to each other in memory, good cache performance list is slower (6x) because it needs an extra dereferencing. Elements are in the heap next to each other, but each element is bigger (2 extra pointers), which slows it down (more memory traffic, more cache misses)
  • 103. Caching – effects on performance int main(void) { list<int> s, s2; vector<int> v; for(int i = 0; i < 50000000; ++i){ s.push_back(i); s2.push_back(i); // just to spread elements v.push_back(i); } long ss = 0, sv = 0; for(list<int>::iterator sit = s.begin(); sit != s.end(); ++sit){ ss += *sit; } for(vector<int>::iterator vit = v.begin(); vit != v.end(); ++vit ){ sv2 += *vit; } return 0; } next prev int next prev int next prev int next prev int next prev control list allocator (heap) of list, all list elements allocated one after each other begin middle end vector 1.08 sec (2x slower than before) 0.06 sec (30% faster than before) vector is faster, elements are next to each other in memory, good cache performance In addition, now we’re using the iterator, which has a better cache performance than using an index into the vecotor (which btw. introduces an extra multiply) now the list is even slower (18x), because its elements are not following each other in the heap, there’s an element of list s2 wedged in-between, worsening the cache misses, making the list twice as slow as before
  • 104. stl set vs. vector find comparison 2 6 1 4 3 5 vtemp vector numbers 1-M shuffled 6 4 3 2 1 5 v vector numbers 1-M shuffled (differently) 1 3 2 4 6 5 s set numbers 1-M ordered on RB tree // linear search in vector using vector::find for(vector<X>::iterator vit = vtemp.begin(); vit != vtemp.end(); ++vit){ i = find(v.begin(), v.end(), *vit); } // linear search in vector using iteration begin -> end for(vector<X>::iterator vit = vtemp.begin(); vit != vtemp.end(); ++vit){ for(i = v.begin(); i != v.end(); ++i){ if(*i == *vit) break; } } // O(logn) search on set using set::find for(vector<X>::iterator vit = vtemp.begin(); vit != vtemp.end(); ++vit){ si = s.find(*vit); } This is an overload used by find() for the RAI case. 168 template<typename _RandomAccessIterator, typename _Tp> 169 _RandomAccessIterator 170 __find(_RandomAccessIterator __first, _RandomAccessIterator __last, 171 const _Tp& __val, random_access_iterator_tag) 172 { 173 typename iterator_traits<_RandomAccessIterator>::difference_type 174 __trip_count = (__last - __first) >> 2; 175 176 for (; __trip_count > 0; --__trip_count) 177 { 178 if (*__first == __val) 179 return __first; 180 ++__first; 181 182 if (*__first == __val) 183 return __first; 184 ++__first; 185 186 if (*__first == __val) 187 return __first; 188 ++__first; 189 190 if (*__first == __val) 191 return __first; 192 ++__first; 193 } 194 195 switch (__last - __first) 196 { 197 case 3: 198 if (*__first == __val) 199 return __first; 200 ++__first; 201 case 2: 202 if (*__first == __val) 203 return __first; 204 ++__first; 205 case 1: 206 if (*__first == __val) 207 return __first; 208 ++__first; 209 case 0: 210 default: 211 return __last; 212 } 213 } 214 Note: linear vector::find() is faster than logarithmic set::find() on M < 160 !! loop unrolling in vector::find makes a big difference!!
  • 105. Caching – can you spot the problems? // Example 1 vector<int> vx, vy; // huge vectors, storing x and y coordinates class Rectangle{ public: int xl, yl, xh, yh; Rectangle(size_t xli, size_t yli, size_t xhi, size_t yhi) : xl(vx[xli), yl(vy[yli]), xh(vx[xhi]), yh(vy[yhi]) {} } // example 2 vector<int> v; void foo() { vector<int> w; for( .. ) { temp = v[i] + w[i]; } } // example 3 void bar() { set<int> si; map<int, char> mi; for( .. ){ si.insert(i); mi.insert(make_pair(i, c)); } }
  • 106.
  • 107. Peformance C++ : if-less code // median function implementation in STL inline const int & median( const int & a, const int & b, const int & c) { if (a < b) if (b < c) return b; else if (a < c) return c; else return a; else if (a < c) return a; else if (b < c) return c; else return b; } _Z6medianRKiS0_S0_: .LFB2: movl (%rdi), %ecx movl (%rsi), %eax cmpl %eax, %ecx jge .L2 movl (%rdx), %r8d cmpl %r8d, %eax jl .L3 cmpl %r8d, %ecx jge .L5 .L4: movq %rdx, %rsi .L3: movq %rsi, %rax .p2align 4,,1 .p2align 3 ret .p2align 4,,10 .p2align 3 .L2: movl (%rdx), %r8d cmpl %r8d, %ecx .p2align 4,,2 .p2align 3 jl .L5 cmpl %r8d, %eax .p2align 4,,2 .p2align 3 jl .L4 movq %rsi, %rax .p2align 4,,3 .p2align 3 ret .p2align 4,,10 .p2align 3 .L5: movq %rdi, %rsi movq %rsi, %rax .p2align 4,,4 .p2align 3 ret
  • 108. if-less code const int & median( const int & a, const int & b, const int & c) { const bool alb = a < b; const bool blc = b < c; const bool alc = a < c; const int * input[3] = {&a,&b,&c}; unsigned int index = 0; index += (alb & blc) | (!alb & !blc); index += (unsigned int)((alc & !blc) | (!alc & blc)) << 1; return *(input[index]); } __Z6medianRKiS0_S0_: .LFB2: movl (%rsi), %ecx movl (%rdi), %r8d movl (%rdx), %eax movq %rsi, -32(%rsp) movq %rdx, -24(%rsp) movq %rdi, -40(%rsp) cmpl %ecx, %r8d setl %r9b cmpl %eax, %ecx setl %cl cmpl %eax, %r8d movl %r9d, %edx setl %r8b movzbl %cl, %esi xorl $1, %ecx movl %r8d, %eax movzbl %cl, %ecx xorl $1, %edx xorl $1, %eax andl %ecx, %r8d andl %edx, %ecx andl %esi, %eax andl %r9d, %esi orl %r8d, %eax orl %esi, %ecx leal (%rcx,%rax,2), %eax mov %eax, %eax movq -40(%rsp,%rax,8), %rax movl (%rax), %eax ret straight code, no ifs, no conditional jumps, faster!
  • 109. Runtime comparison between stl median and ifless median unsigned int M = 100000000; vector<int> v; for(int i = 0; i < M; ++i){ v.push_back(rand()); } for(size_t i = 2; i < M; ++i){ ss1 += stl_median(v[i-2], v[i-1], v[i]); } for(size_t i = 2; i < M; ++i){ ss1 += median(v[i-2], v[i-1], v[i]); } for(size_t i = 2; i < M; ++i){ ss1 += stl_median(v[i-2], v[i-1], v[i++]); … ss1 += stl_median(v[i-2], v[i-1], v[i++]); // 8 – way loop unrolling } for(size_t i = 2; i < M; ++i){ ss1 += median(v[i-2], v[i-1], v[i++]); … ss1 += median(v[i-2], v[i-1], v[i++]); // 8 – way loop unrolling } ts = total time in for loop for stl median ts tg = total time in for loop for stl median ts ts8 = total time in for loop for stl median ts tg8 = total time in for loop for stl median ts ts/tg = 0.82 ts8/tg8 = 0.66 median + O stl_median + O = 0.82 median + O/8 stl_median + O/8 = 0.66 median/stl_median = 0.63 ifless median is 37% faster than stl median !
  • 110.