Let's see a HBase example to import data of a file in HBase table.
Use Case
We have to import data present in the file into an HBase table by creating it through Java API.
Data_file.txt contains the below data
- 1,India,Bihar,Champaran,2009,April,P1,1,5
- 2,India, Bihar,Patna,2009,May,P1,2,10
- 3,India, Bihar,Bhagalpur,2010,June,P2,3,15
- 4,United States,California,Fresno,2009,April,P2,2,5
- 5,United States,California,Long Beach,2010,July,P2,4,10
- 6,United States,California,San Francisco,2011,August,P1,6,20
The Java code is shown below
This data has to be inputted into a new HBase table to be created through JAVA API. Following column families have to be created
- "sample,region,time.product,sale,profit".
Column family region has three column qualifiers: country, state, city
Column family Time has two column qualifiers: year, month
Jar Files
Make sure that the following jars are present while writing the code as they are required by the HBase.
- commons-loging-1.0.4
- commons-loging-api-1.0.4
- hadoop-core-0.20.2-cdh3u2
- hbase-0.90.4-cdh3u2
- log4j-1.2.15
- zookeper-3.3.3-cdh3u0
Program Code
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.StringTokenizer;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.hbase.HBaseConfiguration;
- import org.apache.hadoop.hbase.HColumnDescriptor;
- import org.apache.hadoop.hbase.HTableDescriptor;
- import org.apache.hadoop.hbase.client.HBaseAdmin;
- import org.apache.hadoop.hbase.client.HTable;
- import org.apache.hadoop.hbase.client.Put;
- import org.apache.hadoop.hbase.util.Bytes;
-
-
- public class readFromFile {
- public static void main(String[] args) throws IOException{
- if(args.length==1)
- {
- Configuration conf = HBaseConfiguration.create(new Configuration());
- HBaseAdmin hba = new HBaseAdmin(conf);
- if(!hba.tableExists(args[0])){
- HTableDescriptor ht = new HTableDescriptor(args[0]);
- ht.addFamily(new HColumnDescriptor("sample"));
- ht.addFamily(new HColumnDescriptor("region"));
- ht.addFamily(new HColumnDescriptor("time"));
- ht.addFamily(new HColumnDescriptor("product"));
- ht.addFamily(new HColumnDescriptor("sale"));
- ht.addFamily(new HColumnDescriptor("profit"));
- hba.createTable(ht);
- System.out.println("New Table Created");
-
- HTable table = new HTable(conf,args[0]);
-
- File f = new File("/home/training/Desktop/data");
- BufferedReader br = new BufferedReader(new FileReader(f));
- String line = br.readLine();
- int i =1;
- String rowname="row";
- while(line!=null && line.length()!=0){
- System.out.println("Ok till here");
- StringTokenizer tokens = new StringTokenizer(line,",");
- rowname = "row"+i;
- Put p = new Put(Bytes.toBytes(rowname));
- p.add(Bytes.toBytes("sample"),Bytes.toBytes("sampleNo."),
- Bytes.toBytes(Integer.parseInt(tokens.nextToken())));
- p.add(Bytes.toBytes("region"),Bytes.toBytes("country"),Bytes.toBytes(tokens.nextToken()));
- p.add(Bytes.toBytes("region"),Bytes.toBytes("state"),Bytes.toBytes(tokens.nextToken()));
- p.add(Bytes.toBytes("region"),Bytes.toBytes("city"),Bytes.toBytes(tokens.nextToken()));
- p.add(Bytes.toBytes("time"),Bytes.toBytes("year"),Bytes.toBytes(Integer.parseInt(tokens.nextToken())));
- p.add(Bytes.toBytes("time"),Bytes.toBytes("month"),Bytes.toBytes(tokens.nextToken()));
- p.add(Bytes.toBytes("product"),Bytes.toBytes("productNo."),Bytes.toBytes(tokens.nextToken()));
- p.add(Bytes.toBytes("sale"),Bytes.toBytes("quantity"),Bytes.toBytes(Integer.parseInt(tokens.nextToken())));
- p.add(Bytes.toBytes("profit"),Bytes.toBytes("earnings"),Bytes.toBytes(tokens.nextToken()));
- i++;
- table.put(p);
- line = br.readLine();
- }
- br.close();
- table.close();
- }
- else
- System.out.println("Table Already exists.Please enter another table name");
- }
- else
- System.out.println("Please Enter the table name through command line");
- }
- }
No comments:
Post a Comment