Friday, August 16, 2019

HBase Example

Let's see a HBase example to import data of a file in HBase table.

Use Case

We have to import data present in the file into an HBase table by creating it through Java API.
Data_file.txt contains the below data

  1. 1,India,Bihar,Champaran,2009,April,P1,1,5  
  2. 2,India, Bihar,Patna,2009,May,P1,2,10  
  3. 3,India, Bihar,Bhagalpur,2010,June,P2,3,15  
  4. 4,United States,California,Fresno,2009,April,P2,2,5  
  5. 5,United States,California,Long Beach,2010,July,P2,4,10  
  6. 6,United States,California,San Francisco,2011,August,P1,6,20  
The Java code is shown below
This data has to be inputted into a new HBase table to be created through JAVA API. Following column families have to be created
  1. "sample,region,time.product,sale,profit".  
Column family region has three column qualifiers: country, state, city
Column family Time has two column qualifiers: year, month

Jar Files

Make sure that the following jars are present while writing the code as they are required by the HBase.
  1. commons-loging-1.0.4
  2. commons-loging-api-1.0.4
  3. hadoop-core-0.20.2-cdh3u2
  4. hbase-0.90.4-cdh3u2
  5. log4j-1.2.15
  6. zookeper-3.3.3-cdh3u0

Program Code

  1. import java.io.BufferedReader;  
  2. import java.io.File;  
  3. import java.io.FileReader;  
  4. import java.io.IOException;  
  5. import java.util.StringTokenizer;  
  6.   
  7. import org.apache.hadoop.conf.Configuration;  
  8. import org.apache.hadoop.hbase.HBaseConfiguration;  
  9. import org.apache.hadoop.hbase.HColumnDescriptor;  
  10. import org.apache.hadoop.hbase.HTableDescriptor;  
  11. import org.apache.hadoop.hbase.client.HBaseAdmin;  
  12. import org.apache.hadoop.hbase.client.HTable;  
  13. import org.apache.hadoop.hbase.client.Put;  
  14. import org.apache.hadoop.hbase.util.Bytes;  
  15.   
  16.   
  17. public class readFromFile {  
  18.     public static void main(String[] args) throws IOException{  
  19.         if(args.length==1)  
  20.             {  
  21.             Configuration conf = HBaseConfiguration.create(new Configuration());  
  22.             HBaseAdmin hba = new HBaseAdmin(conf);  
  23.             if(!hba.tableExists(args[0])){  
  24.                 HTableDescriptor ht = new HTableDescriptor(args[0]);  
  25.                 ht.addFamily(new HColumnDescriptor("sample"));  
  26.                 ht.addFamily(new HColumnDescriptor("region"));  
  27.                 ht.addFamily(new HColumnDescriptor("time"));  
  28.                 ht.addFamily(new HColumnDescriptor("product"));  
  29.                 ht.addFamily(new HColumnDescriptor("sale"));  
  30.                 ht.addFamily(new HColumnDescriptor("profit"));  
  31.                 hba.createTable(ht);  
  32.                 System.out.println("New Table Created");  
  33.                   
  34.                 HTable table = new HTable(conf,args[0]);  
  35.               
  36.                 File f = new File("/home/training/Desktop/data");  
  37.                 BufferedReader br = new BufferedReader(new FileReader(f));  
  38.                 String line = br.readLine();  
  39.                 int i =1;  
  40.                 String rowname="row";  
  41.                 while(line!=null && line.length()!=0){  
  42.                     System.out.println("Ok till here");  
  43.                     StringTokenizer tokens = new StringTokenizer(line,",");  
  44.                     rowname = "row"+i;  
  45.                     Put p = new Put(Bytes.toBytes(rowname));  
  46.                     p.add(Bytes.toBytes("sample"),Bytes.toBytes("sampleNo."),  
  47. Bytes.toBytes(Integer.parseInt(tokens.nextToken())));  
  48.                     p.add(Bytes.toBytes("region"),Bytes.toBytes("country"),Bytes.toBytes(tokens.nextToken()));  
  49.                     p.add(Bytes.toBytes("region"),Bytes.toBytes("state"),Bytes.toBytes(tokens.nextToken()));  
  50.                     p.add(Bytes.toBytes("region"),Bytes.toBytes("city"),Bytes.toBytes(tokens.nextToken()));  
  51.                     p.add(Bytes.toBytes("time"),Bytes.toBytes("year"),Bytes.toBytes(Integer.parseInt(tokens.nextToken())));  
  52.                     p.add(Bytes.toBytes("time"),Bytes.toBytes("month"),Bytes.toBytes(tokens.nextToken()));  
  53.                     p.add(Bytes.toBytes("product"),Bytes.toBytes("productNo."),Bytes.toBytes(tokens.nextToken()));  
  54.                     p.add(Bytes.toBytes("sale"),Bytes.toBytes("quantity"),Bytes.toBytes(Integer.parseInt(tokens.nextToken())));  
  55.                     p.add(Bytes.toBytes("profit"),Bytes.toBytes("earnings"),Bytes.toBytes(tokens.nextToken()));  
  56.                     i++;  
  57.                     table.put(p);  
  58.                     line = br.readLine();  
  59.                 }  
  60.                     br.close();  
  61.                     table.close();  
  62.                 }  
  63.             else  
  64.                 System.out.println("Table Already exists.Please enter another table name");  
  65.         }  
  66.         else  
  67.             System.out.println("Please Enter the table name through command line");       
  68.     }  
  69. }  

No comments:

Post a Comment

Lab 09: Publish and subscribe to Event Grid events

  Microsoft Azure user interface Given the dynamic nature of Microsoft cloud tools, you might experience Azure UI changes that occur after t...