Hbase统计海量数据行数
业务需要统计每天hbase的数据量,而且每天增量有上百万条。
网上找了些代码,很多推荐使用Coprocessor的方式,执行效率高。但在我的大数据环境 运行出错,报“No registered coprocessor service found for name AggregateService in region xxx”。后来发现是第一次运行时需要下面这些代码来修改配置
String coprocessorClass = "org.apache.hadoop.hbase.coprocessor.AggregateImplementation";
if (! descriptor.hasCoprocessor(coprocessorClass)) {
descriptor.addCoprocessor(coprocessorClass);
}
admin.modifyTable(name, descriptor);
admin.enableTable(name);
public void rowCount(String tablename){
try {
//提前创建connection和conf
Admin admin = connection.getAdmin();
TableName name=TableName.valueOf(tablename);
//先disable表,添加协处理器后再enable表
admin.disableTable(name);
HTableDescriptor descriptor = admin.getTableDescriptor(name);
String coprocessorClass = "org.apache.hadoop.hbase.coprocessor.AggregateImplementation";
if (! descriptor.hasCoprocessor(coprocessorClass)) {
descriptor.addCoprocessor(coprocessorClass);
}
admin.modifyTable(name, descriptor);
admin.enableTable(name);
//计时
StopWatch stopWatch = new StopWatch();
stopWatch.start();
Scan scan = new Scan();
AggregationClient aggregationClient = new AggregationClient(conf);
System.out.println("RowCount: " + aggregationClient.rowCount(name, new LongColumnInterpreter(), scan));
stopWatch.stop();
System.out.println("统计耗时:" +stopWatch.getTotalTimeMillis());
} catch (Throwable e) {
e.printStackTrace();
}
}