google-snappy   
     (machy)
google-snappy                          
Snappy is a compression/decompression library. It does not aim for maximum
compression, or compatibility with any other compression library; instead,
it aims for very high speeds and reasonable compression. For instance,
compared to the fastest mode of zlib, Snappy is an order of magnitude faster
for most inputs, but the resulting compressed files are anywhere from 20% to
100% bigger. (For more information, see "Performance", below.)

                                                   README                      

•  snappy           /                          snappy


                                        zlib
                    20    100%
      1                                               ”Performance”
•                               1.0.3
• 
   http://code.google.com/p/snappy/
•  google-gflags




•  google-gflags, google-snappy

     WARNING: Compiled with assertions enabled, will be slow.

     ./configure CXXFLAGS=“-g -O2 –DNDEBUG” --with-gflags
     --with-gflags       gflags
     configure                       gflags
snappy_unittest                             
•                        snappy_unittest

• 
     ./snappy_unittest
• 
     ./snappy_unittest -run_microbenchmarks=false -
     write_compressed aaa.txt
                 aaa.txt.comp
• 
   ./snappy_unittest -run_microbenchmarks=false -
   write_uncompressed aaa.txt.comp
                          aaa.txt.comp.uncomp
•  zlib
   ./snappy_unittest -run_microbenchmarks=false -zlib testdata/*
•  snappy




                      
                    
                
            alice29.txt
                              
   149KB
            html_x_4
      400KB   HTML
                  400KB
             urls.10K
     1       URL     
              686KB
      baddata1.snappy
                         (?)
       27KB
            house.jpg
             
                      124KB
zlib                                                         
                  
                   
         
                
          
                      snappy
              99.1 MB/s
    293.3 MB/s
   59.8 %
   alice29.txt
       zlib fastest
        20.7 MB/s
     81.5 MB/s
   42.8 %
                      zlib default
         6.6 MB/s
     90.4 MB/s
   35.8 %
                      snappy
             230.2 MB/s
    557.9 MB/s
   23.6 %
   html_x_4
          zlib fastest
        45.6 MB/s
    154.5 MB/s
   16.5 %
                      zlib default
       20.7 MB /s
    177.7 MB/s
   13.0 %
                      snappy
             132.6 MB/s
    411.2 MB/s
   50.9 %
    urls.10K
         zlib fastest
        24.7 MB/s
     94.8 MB/s
   36.1 %
                      zlib default
        12.2 MB/s
    102.4 MB/s
   31.7 %
                      snappy
             137.5 MB/s    1068.7 MB/s
   97.0 %
baddata1.snappy
      zlib fastest
        12.3 MB/s
     57.0 MB/s
   84.1 %
                      zlib default
        10.8 MB/s
     58.9 MB/s
   83.4 %
                      snappy
             933.7 MB/s
   7271.6 MB/s
   99.9 %
   house.jpg
         zlib fastest
        11.9 MB/s
     89.6 MB/s
   99.6 %
                      zlib default
        11.5 MB/s
    122.4 MB/s
   99.6 %
zlib                                                               
                  
                   
             
                  
          
                      snappy
                  99.1 MB/s
      293.3 MB/s
   59.8 %
   alice29.txt
       zlib fastest
            20.7 MB/s
       81.5 MB/s
   42.8 %
                      zlib default
             6.6 MB/s
       90.4 MB/s
   35.8 %
                      snappy
                230.2 MB/s
       557.9 MB/s
   23.6 %
   html_x_4
          zlib fastest
            45.6 MB/s
      154.5 MB/s
   16.5 %
                      zlib default
           20.7 MB /s
      177.7 MB/s
   13.0 %
                      snappy
                132.6 MB/s
       411.2 MB/s
   50.9 %
    urls.10K
         zlib fastest
            24.7 MB/s
       94.8 MB/s
   36.1 %
                      zlib default
            12.2 MB/s
      102.4 MB/s
   31.7 %
                      snappy
                137.5 MB/s      1068.7 MB/s
    97.0 %
baddata1.snappy
      zlib fastest
            12.3 MB/s
       57.0 MB/s
   84.1 %
                                              zlib fastest(level=1) 5
                      zlib default
                                           3.510.8 MB/s
        58.9 MB/s
   83.4 %
                      snappy
             1.2 933.7 MB/s
                                                1.4           7271.6 MB/s
                                                                      
      99.9 %
   house.jpg
         zlib fastest
            11.9 MB/s
       89.6 MB/s
   99.6 %
                      zlib default
            11.5 MB/s
      122.4 MB/s
   99.6 %
zlib                                                             
                  
                   
         
                    
          
                      snappy
              99.1 MB/s
        293.3 MB/s
   59.8 %
   alice29.txt
       zlib fastest
        20.7 MB/s
         81.5 MB/s
   42.8 %
                      zlib default
         6.6 MB/s
         90.4 MB/s
   35.8 %
                      snappy
             230.2 MB/s
        557.9 MB/s
   23.6 %
                                           snappy
   html_x_4
          zlib fastest
        45.6 MB/s
        154.5 MB/s
   16.5 %
                      zlib default
       20.7 MB /s
   
    177.7 MB/s
   13.0 %
                      snappy
             132.6 MB/s
        411.2 MB/s
   50.9 %
    urls.10K
         zlib fastest
        24.7 MB/s
         94.8 MB/s
   36.1 %
                      zlib default
        12.2 MB/s
        102.4 MB/s
   31.7 %
                      snappy
             137.5 MB/s        1068.7 MB/s
   97.0 %
baddata1.snappy
      zlib fastest
        12.3 MB/s
         57.0 MB/s
   84.1 %
                      zlib default
        10.8 MB/s
         58.9 MB/s
   83.4 %
                      snappy
             933.7 MB/s
       7271.6 MB/s
   99.9 %
   house.jpg
         zlib fastest
        11.9 MB/s
         89.6 MB/s
   99.6 %
                      zlib default
        11.5 MB/s
        122.4 MB/s
   99.6 %
lzo                                                             
                  
                 
               
                
           
                      snappy
                  85.9 MB/s
    259.7 MB/s
    59.8 %
   alice29.txt
                      lzo
                     90.6 MB/s
    178.2 MB/s
    57.8 %
                      snappy
              206.7 MB/s
       463.1 MB/s
    23.6 %
   html_x_4
                      lzo
                 203.3 MB/s
       421.6 MB/s
    21.8 %
                      snappy
              119.4 MB/s
       363.2 MB/s
    50.9 %
    urls.10K
                      lzo
                 125.3 MB/s
       308.5 MB/s
    49.3 %
                      snappy
              109.6 MB/s       1048.1 MB/s
    97.0 %
baddata1.snappy
                      lzo
                 353.4 MB/s
      2267.1 MB/s
   100.4 %
                      snappy
              846.4 MB/s
      6642.0 MB/s
    99.9 %
   house.jpg
                      lzo
                 672.6 MB/s
      2024.4 MB/s
   100.3 %


                                hadoop               lzo
           snappy                                           snappy
                                         lzo
snappy       zlib       



zlib(deflate)

         
          
               
   




snappy
government_of_the_people,_by_the_people,_for_the_people



government_of_the_people,_by[15,13]for[16,11]

                             15        13   

HAHAHAHAHA...


HA[2,8]...
18bit




15bit
snappy             
                           LITERAL
                                                 




         LITERAL       

   
        

                             11byte

                           64byte
           4byte   
                                       64byte
snappy             
                           LITERAL
                                                 




            Byte                       
         LITERAL       

   
        

                             11byte

                           64byte
           4byte   
                                       64byte
snappy                                                      
               16KB      fragment             



                                          




      fragment (16KB)
              fragment (16KB)
   fragment



fragment
snappy                                                          2 
             (     8192        )            byte                            

4byte                                                     

government_of_the_people,_by_the_people,_for_the_people
                  13 : Hash(“f_th”)=7
             

        0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
        6
       0
 4
             1
 13
              5
 8
 10
 11
   3

government_of_the_people,_by_the_people,_for_the_people
                   14 : Hash(“_the”)=9
            

        0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
        6
       0
 4
             1
 13
     14
 5
 8
 10
 11
        3
snappy                                                    3 
                                                   

government_of_the_people,_by_the_people,_for_the_people

                                  29 : Hash(“_the”)=9
      0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
       6
 23
 0
 4
 27
 26
 22
 13
 24
 14
 25
 15
 10
 28
 16
 20
     =13
government_of_the_people,_by_the_people,_for_the_people

[government_of_the_people,_by][15,13]
                                                    32
            
government_of_the_people,_by_the_people,_for_the_people
static inline uint32 HashBytes(uint32 bytes, int shift) {
   uint32 kMul = 0x1e35a7bd;
   return (bytes * kMul) >> shift;
 }
static inline uint32 HashBytes(uint32 bytes, int shift) {
   uint32 kMul = 0x1e35a7bd;
   return (bytes * kMul) >> shift;
 }

                   4byte
    4byte                         32bit
                         CPU                               
    #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
    #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
    #else
    inline uint32 UNALIGNED_LOAD32(const void *p) {
      uint32 t;
      memcpy(&t, p, sizeof t);
      return t;
    }
static inline uint32 HashBytes(uint32 bytes, int shift) {
   uint32 kMul = 0x1e35a7bd;
   return (bytes * kMul) >> shift;
 }
shift
               8192     (2   13   )       32-13=19
              8192
JPEG

(~5% performance, ~0.1% density)       

...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz...


...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz...
                       32                  2       

...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz...


...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz...
                 32                    1           
16KB                               1008
•              4byte

• 


•        CPU

• 
• 



     (   zlib   deflateBound(),compressBound()
                         )
•  snappy


• 
•  16KB     fragment
                   CPU

•                             (zlib
                         )

Snappy servay

  • 1.
  • 2.
    google-snappy Snappy is a compression/decompression library. It does not aim for maximum compression, or compatibility with any other compression library; instead, it aims for very high speeds and reasonable compression. For instance, compared to the fastest mode of zlib, Snappy is an order of magnitude faster for most inputs, but the resulting compressed files are anywhere from 20% to 100% bigger. (For more information, see "Performance", below.) README •  snappy / snappy zlib 20 100% 1 ”Performance”
  • 3.
    •  1.0.3 •  http://code.google.com/p/snappy/ •  google-gflags •  google-gflags, google-snappy WARNING: Compiled with assertions enabled, will be slow. ./configure CXXFLAGS=“-g -O2 –DNDEBUG” --with-gflags --with-gflags gflags configure gflags
  • 4.
    snappy_unittest •  snappy_unittest •  ./snappy_unittest •  ./snappy_unittest -run_microbenchmarks=false - write_compressed aaa.txt aaa.txt.comp •  ./snappy_unittest -run_microbenchmarks=false - write_uncompressed aaa.txt.comp aaa.txt.comp.uncomp •  zlib ./snappy_unittest -run_microbenchmarks=false -zlib testdata/*
  • 5.
    •  snappy alice29.txt 149KB html_x_4 400KB HTML 400KB urls.10K 1 URL 686KB baddata1.snappy (?) 27KB house.jpg 124KB
  • 6.
    zlib snappy 99.1 MB/s 293.3 MB/s 59.8 % alice29.txt zlib fastest 20.7 MB/s 81.5 MB/s 42.8 % zlib default 6.6 MB/s 90.4 MB/s 35.8 % snappy 230.2 MB/s 557.9 MB/s 23.6 % html_x_4 zlib fastest 45.6 MB/s 154.5 MB/s 16.5 % zlib default 20.7 MB /s 177.7 MB/s 13.0 % snappy 132.6 MB/s 411.2 MB/s 50.9 % urls.10K zlib fastest 24.7 MB/s 94.8 MB/s 36.1 % zlib default 12.2 MB/s 102.4 MB/s 31.7 % snappy 137.5 MB/s 1068.7 MB/s 97.0 % baddata1.snappy zlib fastest 12.3 MB/s 57.0 MB/s 84.1 % zlib default 10.8 MB/s 58.9 MB/s 83.4 % snappy 933.7 MB/s 7271.6 MB/s 99.9 % house.jpg zlib fastest 11.9 MB/s 89.6 MB/s 99.6 % zlib default 11.5 MB/s 122.4 MB/s 99.6 %
  • 7.
    zlib snappy 99.1 MB/s 293.3 MB/s 59.8 % alice29.txt zlib fastest 20.7 MB/s 81.5 MB/s 42.8 % zlib default 6.6 MB/s 90.4 MB/s 35.8 % snappy 230.2 MB/s 557.9 MB/s 23.6 % html_x_4 zlib fastest 45.6 MB/s 154.5 MB/s 16.5 % zlib default 20.7 MB /s 177.7 MB/s 13.0 % snappy 132.6 MB/s 411.2 MB/s 50.9 % urls.10K zlib fastest 24.7 MB/s 94.8 MB/s 36.1 % zlib default 12.2 MB/s 102.4 MB/s 31.7 % snappy 137.5 MB/s 1068.7 MB/s 97.0 % baddata1.snappy zlib fastest 12.3 MB/s 57.0 MB/s 84.1 % zlib fastest(level=1) 5 zlib default 3.510.8 MB/s 58.9 MB/s 83.4 % snappy 1.2 933.7 MB/s 1.4 7271.6 MB/s 99.9 % house.jpg zlib fastest 11.9 MB/s 89.6 MB/s 99.6 % zlib default 11.5 MB/s 122.4 MB/s 99.6 %
  • 8.
    zlib snappy 99.1 MB/s 293.3 MB/s 59.8 % alice29.txt zlib fastest 20.7 MB/s 81.5 MB/s 42.8 % zlib default 6.6 MB/s 90.4 MB/s 35.8 % snappy 230.2 MB/s 557.9 MB/s 23.6 % snappy html_x_4 zlib fastest 45.6 MB/s 154.5 MB/s 16.5 % zlib default 20.7 MB /s 177.7 MB/s 13.0 % snappy 132.6 MB/s 411.2 MB/s 50.9 % urls.10K zlib fastest 24.7 MB/s 94.8 MB/s 36.1 % zlib default 12.2 MB/s 102.4 MB/s 31.7 % snappy 137.5 MB/s 1068.7 MB/s 97.0 % baddata1.snappy zlib fastest 12.3 MB/s 57.0 MB/s 84.1 % zlib default 10.8 MB/s 58.9 MB/s 83.4 % snappy 933.7 MB/s 7271.6 MB/s 99.9 % house.jpg zlib fastest 11.9 MB/s 89.6 MB/s 99.6 % zlib default 11.5 MB/s 122.4 MB/s 99.6 %
  • 9.
    lzo snappy 85.9 MB/s 259.7 MB/s 59.8 % alice29.txt lzo 90.6 MB/s 178.2 MB/s 57.8 % snappy 206.7 MB/s 463.1 MB/s 23.6 % html_x_4 lzo 203.3 MB/s 421.6 MB/s 21.8 % snappy 119.4 MB/s 363.2 MB/s 50.9 % urls.10K lzo 125.3 MB/s 308.5 MB/s 49.3 % snappy 109.6 MB/s 1048.1 MB/s 97.0 % baddata1.snappy lzo 353.4 MB/s 2267.1 MB/s 100.4 % snappy 846.4 MB/s 6642.0 MB/s 99.9 % house.jpg lzo 672.6 MB/s 2024.4 MB/s 100.3 % hadoop lzo snappy snappy lzo
  • 10.
    snappy zlib zlib(deflate) snappy
  • 11.
  • 12.
  • 13.
    snappy LITERAL LITERAL 11byte 64byte 4byte 64byte
  • 14.
    snappy LITERAL Byte LITERAL 11byte 64byte 4byte 64byte
  • 15.
    snappy 16KB fragment fragment (16KB) fragment (16KB) fragment fragment
  • 16.
    snappy 2 ( 8192 ) byte 4byte government_of_the_people,_by_the_people,_for_the_people 13 : Hash(“f_th”)=7 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 6 0 4 1 13 5 8 10 11 3 government_of_the_people,_by_the_people,_for_the_people 14 : Hash(“_the”)=9 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 6 0 4 1 13 14 5 8 10 11 3
  • 17.
    snappy 3 government_of_the_people,_by_the_people,_for_the_people 29 : Hash(“_the”)=9 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 6 23 0 4 27 26 22 13 24 14 25 15 10 28 16 20 =13 government_of_the_people,_by_the_people,_for_the_people [government_of_the_people,_by][15,13] 32 government_of_the_people,_by_the_people,_for_the_people
  • 18.
    static inline uint32HashBytes(uint32 bytes, int shift) { uint32 kMul = 0x1e35a7bd; return (bytes * kMul) >> shift; }
  • 19.
    static inline uint32HashBytes(uint32 bytes, int shift) { uint32 kMul = 0x1e35a7bd; return (bytes * kMul) >> shift; } 4byte 4byte 32bit CPU #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p)) #else inline uint32 UNALIGNED_LOAD32(const void *p) { uint32 t; memcpy(&t, p, sizeof t); return t; }
  • 20.
    static inline uint32HashBytes(uint32 bytes, int shift) { uint32 kMul = 0x1e35a7bd; return (bytes * kMul) >> shift; } shift 8192 (2 13 ) 32-13=19 8192
  • 21.
    JPEG (~5% performance, ~0.1%density) ...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz... ...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz... 32 2 ...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz... ...a93ecm2k39cn10xi10chakegueks16krpqw2453maheggubz... 32 1 16KB 1008
  • 22.
    •  4byte •  •  CPU • 
  • 23.
    •  ( zlib deflateBound(),compressBound() )
  • 24.
    •  snappy •  •  16KB fragment CPU •  (zlib )