HBASE INTRODUCTION &
DEVELOPMENT
The term planet-size web application comes to mind, and in this case it is fitting
WHAT IS IT?
• It is the Hadoop database,
• Sclable
• Distributed
• BigDatastore
• Column Oriented
HBASE
HDFS
Reader
Writer
FEATURES OF HBASE
• Scalable.
• Automatic failover
• Consistent reads and writes.
• Sharding of tables
• Failover support
• Classes for backing Hadoop map reduce
jobs
• Java API for client access
• Thrift gateway and a REST Web
WHAT IT IS NOT
•No-Sql
•No relation
•No joins
•Not a replacement of RDBMS
• NoSQL
• HBase is a type of "NoSQL" database. "NoSQL" is a general term meaning that the
database isn't an RDBMS which supports SQL as its primary access language.
• When we should think of using it
• HBase isn't suitable for every problem. We should have lot of data, if data is less
RDBMS is better.
• Difference Between HDFS and HBase
• HDFS is a distributed file system that is well suited for the storage of large files. It's
documentation states that it is not, however, a general purpose file system, and does not
provide fast individual record lookups in files. HBase, on the other hand, is built on top of
HDFS and provides fast record lookups (and updates) for large tables.
THINK ON
THIS
• Facebook, for example, is adding more than 15 TB, and processing daily
• Google adding Peta-Bytes of data and processing.
• Companies storing Logs, temperature details, and many other prospective
to store and process, which come in Peta-byte for which conventional
technologies will days to read the data forget about processing it.
WHAT IS COLUMNS ORIENTED
MEANS
• Grouped by columns,
• The reason to store values on a per-column basis
instead is based on the assumption
• that, for specific queries, not all of the values are
needed.
• Reduced I/O
COMPONENTS
HMASTER
• Master server is responsible for
monitoring all RegionServer instances in
the cluster, and is the interface for all
metadata changes, it runs on the server
which hosts namenode.
• Master controls critical functions such as
RegionServer failover and completing
region splits. So while the cluster can still
run for a time without the Master, the
Master should be restarted as soon as
ZOOKEEP
ER
• Zookeeper is an open source software
providing a highly reliable, distributed
coordination service
• Entry point for an HBase system
• It includes tracking of region servers,
where the root region is hosted
API
• Interface to HBase
• Using these we can we can access HBase and perform
read/write and other operation on HBase.
• REST, Thrift, and Avro
• Thrift API framework, for scalable cross-language
services development, combines a software stack with a
code generation engine to build services that work
efficiently and seamlessly between C++, Java, Python,
PHP, Ruby, Erlang, Perl, Haskell, C#, Cocoa, JavaScript,
Node.js, Smalltalk, OCaml and Delphi and other
languages.
• lib
• commons-configuration-1.8.jar
• commons-lang-2.6.jar
• commons-logging-1.1.1.jar
• hadoop-core-1.0.0.jar
• hbase-0.92.1.jar
• log4j-1.2.16.jar
• slf4j-api-1.5.8.jar
• slf4j-log4j12-1.5.8.jar
• zookeeper-3.4.3.jar
• import org.apache.hadoop.conf.Configuration;
• import org.apache.hadoop.hbase.HBaseConfiguration;
• import org.apache.hadoop.hbase.HColumnDescriptor;
• import org.apache.hadoop.hbase.HTableDescriptor;
• import org.apache.hadoop.hbase.KeyValue;
• import org.apache.hadoop.hbase.MasterNotRunningException;
• import org.apache.hadoop.hbase.ZooKeeperConnectionException;
• import org.apache.hadoop.hbase.client.Delete;
• import org.apache.hadoop.hbase.client.Get;
• import org.apache.hadoop.hbase.client.HBaseAdmin;
• import org.apache.hadoop.hbase.client.HTable;
• import org.apache.hadoop.hbase.client.Result;
• import org.apache.hadoop.hbase.client.ResultScanner;
• import org.apache.hadoop.hbase.client.Scan;
• import org.apache.hadoop.hbase.client.Put;
• import org.apache.hadoop.hbase.util.Bytes;
• Configuration hConf = HBaseConfiguration.create(conf);
hConf.set(Constants.HBASE_CONFIGURATION_ZOOKEEPER_QUO
RUM, hbaseZookeeperQuorum);
hConf.set(Constants.HBASE_CONFIGURATION_ZOOKEEPER_CLIE
NTPORT, hbaseZookeeperClientPort); HTable hTable = new
HTable(hConf, tableName);
• public class HBaseTest {
• private static Configuration conf = null;
• /**
• * Initialization
• */
• static {
• conf = HBaseConfiguration.create();
• }
• }
• /** Create a table
• */
• public static void creatTable(String tableName, String[] familys)
• throws Exception { HBaseAdmin admin = new
HBaseAdmin(conf);
• if (admin.tableExists(tableName)) { System.out.println("table
already exists!");
• } else { HTableDescriptor tableDesc = new
HTableDescriptor(tableName);
• for (int i = 0; i < familys.length; i++) {
• tableDesc.addFamily(new
HColumnDescriptor(familys[i]));
• }admin.createTable(tableDesc);
• System.out.println("create table " + tableName + " ok.");
• }
• /** * Delete a table
• */
• public static void deleteTable(String tableName) throws Exception {
• try {HBaseAdmin admin = new HBaseAdmin(conf);
• admin.disableTable(tableName);
• admin.deleteTable(tableName);
• System.out.println("delete table " + tableName + " ok.");
• } catch (MasterNotRunningException e) {
• e.printStackTrace();
• } catch (ZooKeeperConnectionException e) {
• e.printStackTrace();
• }
• }
• /**
• * Put (or insert) a row
• */
• public static void addRecord(String tableName, String rowKey,
String family, String qualifier, String value) throws Exception {
• try {HTable table = new HTable(conf, tableName);
• Put put = new Put(Bytes.toBytes(rowKey));
• put.add(Bytes.toBytes(family), Bytes.toBytes(qualifier),
Bytes
• .toBytes(value));
• table.put(put);
• System.out.println("insert recored " + rowKey + " to
table "
• + tableName + " ok.");
• } catch (IOException e) {e.printStackTrace();
• /**
• * Delete a row
• */
• public static void delRecord(String tableName, String rowKey)
• throws IOException {HTable table = new HTable(conf,
tableName);
• List<Delete> list = new ArrayList<Delete>();
• Delete del = new Delete(rowKey.getBytes());
• list.add(del);
• table.delete(list);
• System.out.println("del recored " + rowKey + " ok.");
• }
• /**Get a row
• */
• public static void getOneRecord (String tableName, String rowKey)
throws IOException{
• HTable table = new HTable(conf, tableName);
• Get get = new Get(rowKey.getBytes());
• Result rs = table.get(get);
• for(KeyValue kv : rs.raw()){
• System.out.print(new String(kv.getRow()) + " " );
• System.out.print(new String(kv.getFamily()) + ":" );
• System.out.print(new String(kv.getQualifier()) + " " );
• System.out.print(kv.getTimestamp() + " " );
• System.out.println(new String(kv.getValue()));
• }
• }
• /** Scan (or list) a table */
• public static void getAllRecord (String tableName) {
• try{
• HTable table = new HTable(conf, tableName);
• Scan s = new Scan();
• ResultScanner ss = table.getScanner(s);
• for(Result r:ss){
• for(KeyValue kv : r.raw()){
• System.out.print(new String(kv.getRow()) + " ");
• System.out.print(new String(kv.getFamily()) + ":"); System.out.print(new
String(kv.getQualifier()) + " ");
• System.out.print(kv.getTimestamp() + " "); System.out.println(new
String(kv.getValue())); } }
• } catch (IOException e){
• e.printStackTrace();
• }
• }
• public static void main(String[] agrs) {
• try {
• String tablename = "scores";
• String[] familys = { "grade", "course" };
• HBaseTest.creatTable(tablename, familys);
• // add record zkb
• HBaseTest.addRecord(tablename, "zkb", "grade", "", "5");
• HBaseTest.addRecord(tablename, "zkb", "course", "", "90");
• HBaseTest.addRecord(tablename, "zkb", "course", "math", "97");
• HBaseTest.addRecord(tablename, "zkb", "course", "art", "87");
• // add record baoniu
• HBaseTest.addRecord(tablename, "baoniu", "grade", "", "4");
• HBaseTest.addRecord(tablename, "baoniu", "course", "math", "89");
•
• System.out.println("===========get one record========");
• HBaseTest.getOneRecord(tablename, "zkb");
• System.out.println("===========show all record========");
• HBaseTest.getAllRecord(tablename);
• System.out.println("===========del one record========");
• HBaseTest.delRecord(tablename, "baoniu");
• HBaseTest.getAllRecord(tablename);
• System.out.println("===========show all record========");
• HBaseTest.getAllRecord(tablename);
• } catch (Exception e) {
• e.printStackTrace();
• }
• }}
Gmail dwivedishashwat@gmail.com
Twitter shashwat_2010
Facebook shriparv@gmail.com
Skype shriparv

H base introduction & development

  • 1.
    HBASE INTRODUCTION & DEVELOPMENT Theterm planet-size web application comes to mind, and in this case it is fitting
  • 2.
    WHAT IS IT? •It is the Hadoop database, • Sclable • Distributed • BigDatastore • Column Oriented HBASE HDFS Reader Writer
  • 3.
    FEATURES OF HBASE •Scalable. • Automatic failover • Consistent reads and writes. • Sharding of tables • Failover support • Classes for backing Hadoop map reduce jobs • Java API for client access • Thrift gateway and a REST Web
  • 4.
    WHAT IT ISNOT •No-Sql •No relation •No joins •Not a replacement of RDBMS
  • 5.
    • NoSQL • HBaseis a type of "NoSQL" database. "NoSQL" is a general term meaning that the database isn't an RDBMS which supports SQL as its primary access language. • When we should think of using it • HBase isn't suitable for every problem. We should have lot of data, if data is less RDBMS is better. • Difference Between HDFS and HBase • HDFS is a distributed file system that is well suited for the storage of large files. It's documentation states that it is not, however, a general purpose file system, and does not provide fast individual record lookups in files. HBase, on the other hand, is built on top of HDFS and provides fast record lookups (and updates) for large tables.
  • 6.
    THINK ON THIS • Facebook,for example, is adding more than 15 TB, and processing daily • Google adding Peta-Bytes of data and processing. • Companies storing Logs, temperature details, and many other prospective to store and process, which come in Peta-byte for which conventional technologies will days to read the data forget about processing it.
  • 7.
    WHAT IS COLUMNSORIENTED MEANS • Grouped by columns, • The reason to store values on a per-column basis instead is based on the assumption • that, for specific queries, not all of the values are needed. • Reduced I/O
  • 8.
  • 9.
    HMASTER • Master serveris responsible for monitoring all RegionServer instances in the cluster, and is the interface for all metadata changes, it runs on the server which hosts namenode. • Master controls critical functions such as RegionServer failover and completing region splits. So while the cluster can still run for a time without the Master, the Master should be restarted as soon as
  • 10.
    ZOOKEEP ER • Zookeeper isan open source software providing a highly reliable, distributed coordination service • Entry point for an HBase system • It includes tracking of region servers, where the root region is hosted
  • 11.
    API • Interface toHBase • Using these we can we can access HBase and perform read/write and other operation on HBase. • REST, Thrift, and Avro • Thrift API framework, for scalable cross-language services development, combines a software stack with a code generation engine to build services that work efficiently and seamlessly between C++, Java, Python, PHP, Ruby, Erlang, Perl, Haskell, C#, Cocoa, JavaScript, Node.js, Smalltalk, OCaml and Delphi and other languages.
  • 12.
    • lib • commons-configuration-1.8.jar •commons-lang-2.6.jar • commons-logging-1.1.1.jar • hadoop-core-1.0.0.jar • hbase-0.92.1.jar • log4j-1.2.16.jar • slf4j-api-1.5.8.jar • slf4j-log4j12-1.5.8.jar • zookeeper-3.4.3.jar
  • 13.
    • import org.apache.hadoop.conf.Configuration; •import org.apache.hadoop.hbase.HBaseConfiguration; • import org.apache.hadoop.hbase.HColumnDescriptor; • import org.apache.hadoop.hbase.HTableDescriptor; • import org.apache.hadoop.hbase.KeyValue; • import org.apache.hadoop.hbase.MasterNotRunningException; • import org.apache.hadoop.hbase.ZooKeeperConnectionException; • import org.apache.hadoop.hbase.client.Delete; • import org.apache.hadoop.hbase.client.Get; • import org.apache.hadoop.hbase.client.HBaseAdmin; • import org.apache.hadoop.hbase.client.HTable; • import org.apache.hadoop.hbase.client.Result; • import org.apache.hadoop.hbase.client.ResultScanner; • import org.apache.hadoop.hbase.client.Scan; • import org.apache.hadoop.hbase.client.Put; • import org.apache.hadoop.hbase.util.Bytes;
  • 14.
    • Configuration hConf= HBaseConfiguration.create(conf); hConf.set(Constants.HBASE_CONFIGURATION_ZOOKEEPER_QUO RUM, hbaseZookeeperQuorum); hConf.set(Constants.HBASE_CONFIGURATION_ZOOKEEPER_CLIE NTPORT, hbaseZookeeperClientPort); HTable hTable = new HTable(hConf, tableName);
  • 15.
    • public classHBaseTest { • private static Configuration conf = null; • /** • * Initialization • */ • static { • conf = HBaseConfiguration.create(); • } • }
  • 16.
    • /** Createa table • */ • public static void creatTable(String tableName, String[] familys) • throws Exception { HBaseAdmin admin = new HBaseAdmin(conf); • if (admin.tableExists(tableName)) { System.out.println("table already exists!"); • } else { HTableDescriptor tableDesc = new HTableDescriptor(tableName); • for (int i = 0; i < familys.length; i++) { • tableDesc.addFamily(new HColumnDescriptor(familys[i])); • }admin.createTable(tableDesc); • System.out.println("create table " + tableName + " ok."); • }
  • 17.
    • /** *Delete a table • */ • public static void deleteTable(String tableName) throws Exception { • try {HBaseAdmin admin = new HBaseAdmin(conf); • admin.disableTable(tableName); • admin.deleteTable(tableName); • System.out.println("delete table " + tableName + " ok."); • } catch (MasterNotRunningException e) { • e.printStackTrace(); • } catch (ZooKeeperConnectionException e) { • e.printStackTrace(); • } • }
  • 18.
    • /** • *Put (or insert) a row • */ • public static void addRecord(String tableName, String rowKey, String family, String qualifier, String value) throws Exception { • try {HTable table = new HTable(conf, tableName); • Put put = new Put(Bytes.toBytes(rowKey)); • put.add(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes • .toBytes(value)); • table.put(put); • System.out.println("insert recored " + rowKey + " to table " • + tableName + " ok."); • } catch (IOException e) {e.printStackTrace();
  • 19.
    • /** • *Delete a row • */ • public static void delRecord(String tableName, String rowKey) • throws IOException {HTable table = new HTable(conf, tableName); • List<Delete> list = new ArrayList<Delete>(); • Delete del = new Delete(rowKey.getBytes()); • list.add(del); • table.delete(list); • System.out.println("del recored " + rowKey + " ok."); • }
  • 20.
    • /**Get arow • */ • public static void getOneRecord (String tableName, String rowKey) throws IOException{ • HTable table = new HTable(conf, tableName); • Get get = new Get(rowKey.getBytes()); • Result rs = table.get(get); • for(KeyValue kv : rs.raw()){ • System.out.print(new String(kv.getRow()) + " " ); • System.out.print(new String(kv.getFamily()) + ":" ); • System.out.print(new String(kv.getQualifier()) + " " ); • System.out.print(kv.getTimestamp() + " " ); • System.out.println(new String(kv.getValue())); • } • }
  • 21.
    • /** Scan(or list) a table */ • public static void getAllRecord (String tableName) { • try{ • HTable table = new HTable(conf, tableName); • Scan s = new Scan(); • ResultScanner ss = table.getScanner(s); • for(Result r:ss){ • for(KeyValue kv : r.raw()){ • System.out.print(new String(kv.getRow()) + " "); • System.out.print(new String(kv.getFamily()) + ":"); System.out.print(new String(kv.getQualifier()) + " "); • System.out.print(kv.getTimestamp() + " "); System.out.println(new String(kv.getValue())); } } • } catch (IOException e){ • e.printStackTrace(); • } • }
  • 22.
    • public staticvoid main(String[] agrs) { • try { • String tablename = "scores"; • String[] familys = { "grade", "course" }; • HBaseTest.creatTable(tablename, familys); • // add record zkb • HBaseTest.addRecord(tablename, "zkb", "grade", "", "5"); • HBaseTest.addRecord(tablename, "zkb", "course", "", "90"); • HBaseTest.addRecord(tablename, "zkb", "course", "math", "97"); • HBaseTest.addRecord(tablename, "zkb", "course", "art", "87"); • // add record baoniu • HBaseTest.addRecord(tablename, "baoniu", "grade", "", "4"); • HBaseTest.addRecord(tablename, "baoniu", "course", "math", "89"); •
  • 23.
    • System.out.println("===========get onerecord========"); • HBaseTest.getOneRecord(tablename, "zkb"); • System.out.println("===========show all record========"); • HBaseTest.getAllRecord(tablename); • System.out.println("===========del one record========"); • HBaseTest.delRecord(tablename, "baoniu"); • HBaseTest.getAllRecord(tablename); • System.out.println("===========show all record========"); • HBaseTest.getAllRecord(tablename); • } catch (Exception e) { • e.printStackTrace(); • } • }}
  • 24.