Delete Hbase rows example
$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -DstartKey="10000:1365663164575:88888:testhome" -DstopKey="10000:1365663164575:88890:testhome" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"
$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -Dappid="10000" -DstartTime="2010-01-01-01-01" -DstopTime="2014-01-01-01-01" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"
import java.io.IOException; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class MRDeleteRows extends Configured implements Tool { String startRowKey; String stopRowKey; String quorum; String table; String startTime; String stopTime; String appID; public String getStartTime() { return startTime; } public String getStopTime() { return stopTime; } public String getAppID() { return appID; } public String getQuorum() { return quorum; } public String getStartRowKey() { return startRowKey; } public String getStopRowKey() { return stopRowKey; } public String getTable() { return table; } @Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); for (Entry<String, String> entry : conf) { if (entry.getKey().equals("startKey")) { this.startRowKey = entry.getValue(); } if (entry.getKey().equals("stopKey")) { this.stopRowKey = entry.getValue(); } if (entry.getKey().equals("quorum")) { this.quorum = entry.getValue(); } if (entry.getKey().equals("table")) { this.table = entry.getValue(); } if (entry.getKey().equals("startTime")) { this.startTime = entry.getValue(); } if (entry.getKey().equals("stopTime")) { this.stopTime = entry.getValue(); } if (entry.getKey().equals("appid")) { this.appID = entry.getValue(); } } return 0; } static String getRowKey(String appID, String time){ DateFormat df = new SimpleDateFormat("yyyy-MM-dd-HH-mm"); Date date = null; try{ date = df.parse(time); }catch(ParseException e){ System.out.println("Please input correct date format"); System.exit(1); } return appID + ":" + date.getTime(); } static class DeleteMapper extends TableMapper<ImmutableBytesWritable, Delete> { public DeleteMapper() { } @Override public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException { ImmutableBytesWritable userKey = new ImmutableBytesWritable(row.get()); try{ Delete delete = new Delete(row.get()); context.write(userKey, delete); } catch (InterruptedException e){ e.printStackTrace(); throw new IOException(e); } } } public static void main(String[] args) throws Exception { MRDeleteRows deleteElf = new MRDeleteRows(); ToolRunner.run(deleteElf, args); Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", deleteElf.getQuorum()); Job job = new Job(config, "DeleteHbaseRowkeys"); job.setJarByClass(MRDeleteRows.class); Scan scan = new Scan(); System.out.println("quorum: " + deleteElf.getQuorum()); System.out.println("table: " + deleteElf.getTable()); if(deleteElf.getStartRowKey()!=null && deleteElf.getStopRowKey()!=null){ System.out.println("startkey: " + deleteElf.getStartRowKey()); System.out.println("stopkey: " + deleteElf.getStopRowKey()); scan.setStartRow(deleteElf.getStartRowKey().getBytes()); scan.setStopRow(deleteElf.getStopRowKey().getBytes()); } if(deleteElf.getAppID()!=null && deleteElf.getStartTime()!=null && deleteElf.getStopTime()!=null){ System.out.println("AppID: " + deleteElf.getAppID()); System.out.println("start time: " + deleteElf.getStartTime()); System.out.println("stop time: " + deleteElf.getStopTime()); scan.setStartRow(getRowKey(deleteElf.getAppID(),deleteElf.getStartTime()).getBytes()); scan.setStopRow(getRowKey(deleteElf.getAppID(),deleteElf.getStopTime()).getBytes()); } scan.setCacheBlocks(false); TableMapReduceUtil.initTableMapperJob(deleteElf.getTable(), scan, DeleteMapper.class, ImmutableBytesWritable.class, Delete.class, job); TableMapReduceUtil.initTableReducerJob(deleteElf.getTable(), null, job); boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error with job!"); } } }
Hbase Loader MapReduce Example
import java.io.IOException; import java.util.Calendar; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.util.GenericOptionsParser; /** * Sample Uploader MapReduce * <p> * This is EXAMPLE code. You will need to change it to work for your context. * <p> * Uses {@link TableReducer} to put the data into HBase. Change the InputFormat * to suit your data. In this example, we are importing a CSV file. * <p> * * <pre> * row,family,qualifier,value * </pre> * <p> * The table and columnfamily we're to insert into must preexist. * <p> * There is no reducer in this example as it is not necessary and adds * significant overhead. If you need to do any massaging of data before * inserting into HBase, you can do this in the map as well. * <p> * Do the following to start the MR job: * * <pre> * ./bin/hadoop org.apache.hadoop.hbase.mapreduce.SampleUploader /tmp/input.csv TABLE_NAME * </pre> * <p> * This code was written against HBase 0.21 trunk. * * Before running this job, please make sure set HADOOP_CLASSPATH. You need to include zookeeper.jar and hbase-0.90.4-cdh3u3.jar */ public class BulkLoaderToHbase { private static final String NAME = "BulkLoaderToHbase"; private static byte[] SYSINFO; private static byte[] CONTENT; private static byte[] APP_ID; private static byte[] ENV; private static byte[] HOSTNAME; private static byte[] BODY; private static byte[] LOG_FILE_NAME; private static byte[] LOG_TYPE; private static byte[] LOG_FILE_PATH; private static byte[] appId_v; private static byte[] env_v; private static byte[] hostname_v; private static byte[] logPath_v; private static byte[] logFileName_v; private static byte[] logType_v; private static long nano = 0; static class Uploader extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> { private long checkpoint = 100; private long count = 0; @Override public void map(LongWritable key, Text line, Context context) throws IOException { Calendar cal = Calendar.getInstance(); String rowkey = Bytes.toString(appId_v) + ":" + cal.getTimeInMillis() + ":" + (nano++) + ":" + Bytes.toString(hostname_v); byte[] rowKeyValue = Bytes.toBytes(rowkey); Put put = new Put(rowKeyValue); put.add(SYSINFO, APP_ID, appId_v); put.add(SYSINFO, ENV, env_v); put.add(SYSINFO, HOSTNAME, hostname_v); put.add(CONTENT, BODY, line.getBytes()); put.add(CONTENT, LOG_FILE_PATH, logPath_v); put.add(CONTENT, LOG_FILE_NAME, logFileName_v); put.add(CONTENT, LOG_TYPE, logType_v); // Uncomment below to disable WAL. This will improve performance but // means // you will experience data loss in the case of a RegionServer // crash. // put.setWriteToWAL(false); try { context.write(new ImmutableBytesWritable(rowKeyValue), put); } catch (InterruptedException e) { e.printStackTrace(); } // Set status every checkpoint lines if (++count % checkpoint == 0) { context.setStatus("Emitting Put " + count); } } } /** * Job configuration. */ public static Job configureJob(Configuration conf, String[] args) throws IOException { SYSINFO = Bytes.toBytes("sysInfo"); CONTENT = Bytes.toBytes("content"); APP_ID = Bytes.toBytes("appId"); ENV = Bytes.toBytes("env"); HOSTNAME = Bytes.toBytes("hostName"); BODY = Bytes.toBytes("body"); LOG_FILE_PATH = Bytes.toBytes("logFilePath"); LOG_FILE_NAME = Bytes.toBytes("logFileName"); LOG_TYPE = Bytes.toBytes("logType"); Path inputPath = new Path(args[0]); String tableName = args[1]; appId_v = Bytes.toBytes(args[2]); env_v = Bytes.toBytes(args[3]); hostname_v = Bytes.toBytes(args[4]); logPath_v = Bytes.toBytes(args[5]); logFileName_v = Bytes.toBytes(args[6]); logType_v = Bytes.toBytes(args[7]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Uploader.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Uploader.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; } /** * Main entry point. * * @param args * The command line parameters. * @throws Exception * When running the job fails. */ public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); if (otherArgs.length != 8) { System.err .println("Wrong number of arguments: " + otherArgs.length); System.err.println("Usage: " + NAME + " <input> <tablename> <appId> <env> <hostname> <logpath> <logFileName> <logType>"); System.exit(-1); } Job job = configureJob(conf, otherArgs); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
相关推荐
HBase MapReduce完整实例.rar
Eclipse工程 HBase MapReduce完整实例 可远程执行 包含HBase增删改查 执行Test可看到效果
基于hadoop的简单网络爬虫,HBase MapReduce
HBase的 HBase MapReduce投影
mapreduce方式入库hbase hive hdfs,速度很快,里面详细讲述了代码的编写过程,值得下载
该案例中主要使用MapReduce作为处理组件进行数据处理,实现的案例有如通过javaapi实现hbase数据写入hdfs、hbase表数据复制到另一个表中等操作 对应(《HBase分布式存储系统应用》胡鑫喆 张志刚著)教材中案例
hadoop1.1.2操作例子 包括hbase hive mapreduce相应的jar包
│ Day16[Hbase 企业应用及与MapReduce集成].pdf ├─02_视频 │ Day1601_Hbase Java API-环境配置.mp4 │ Day1603_Hbase Java API-put、delete.mp4 │ Day1604_Hbase Java API-Scan和过滤器.mp4 │ Day1605_Hbase...
对Hadoop中的HDFS、MapReduce、Hbase系列知识的介绍。如果想初略了解Hadoop 可下载观看
HDFS+MapReduce+Hive+HBase十分钟快速入门.pdf
基于Hadoop的mapreduce 在hbase上的使用,基于Hadoop的mapreduce 在hbase上的使用
htabse 命令的基本操作步骤,1.熟悉使用HBase操作常用的Shell命令。 2.学会表和族的属性操作。 3.学会Filter操作。 4.学会时间戳和数据版本的操作。 学会数据批量导入。
NULL 博文链接:https://jsh0401.iteye.com/blog/2096103
#资源达人分享计划#
该文件为hbase hbck2 jar;适用于hbase 2.x维护,hbase 1.x不适用;对于HBase跨集群HD集群迁移,当HDFS文件迁移后,使用HBCK客户端,完成HBase元数据修复。当前版本:hbase-hbck2-1.3.0.jarhbase hbck -j /opt/...
Hadoop实战中文版Hadoop HBase MapReduce Hive Pig
hadoop权威指南中文版 云计算 java hive hbase mapreduce hdfs
spring 集成 Hbase所需jar包
Hadoop-0.20.0-HDFS+MapReduce+Hive+HBase十分钟快速入门
7.3. Accessing Other HBase Tables in a MapReduce Job 7.4. Speculative Execution 8. HBase安全 8.1. 安全客户端访问 HBase 8.2. 访问控制 9. 架构 9.1. 概述 9.2. Catalog Tables 9.3. 客户端 9.4. Client ...