001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import java.io.IOException; 021import java.text.ParseException; 022import java.text.SimpleDateFormat; 023import java.util.ArrayList; 024import java.util.Collections; 025import java.util.HashSet; 026import java.util.List; 027import java.util.Map; 028import java.util.Set; 029import java.util.TreeMap; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.conf.Configured; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.hbase.Cell; 034import org.apache.hadoop.hbase.CellUtil; 035import org.apache.hadoop.hbase.HBaseConfiguration; 036import org.apache.hadoop.hbase.KeyValue; 037import org.apache.hadoop.hbase.KeyValueUtil; 038import org.apache.hadoop.hbase.PrivateCellUtil; 039import org.apache.hadoop.hbase.TableName; 040import org.apache.hadoop.hbase.client.Connection; 041import org.apache.hadoop.hbase.client.ConnectionFactory; 042import org.apache.hadoop.hbase.client.Delete; 043import org.apache.hadoop.hbase.client.Mutation; 044import org.apache.hadoop.hbase.client.Put; 045import org.apache.hadoop.hbase.client.RegionLocator; 046import org.apache.hadoop.hbase.client.Table; 047import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 048import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.TableInfo; 049import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec; 050import org.apache.hadoop.hbase.snapshot.SnapshotRegionLocator; 051import org.apache.hadoop.hbase.util.Bytes; 052import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 053import org.apache.hadoop.hbase.util.MapReduceExtendedCell; 054import org.apache.hadoop.hbase.wal.WALEdit; 055import org.apache.hadoop.hbase.wal.WALKey; 056import org.apache.hadoop.mapreduce.Job; 057import org.apache.hadoop.mapreduce.Mapper; 058import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 059import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 060import org.apache.hadoop.util.Tool; 061import org.apache.hadoop.util.ToolRunner; 062import org.apache.yetus.audience.InterfaceAudience; 063import org.slf4j.Logger; 064import org.slf4j.LoggerFactory; 065 066/** 067 * A tool to replay WAL files as a M/R job. The WAL can be replayed for a set of tables or all 068 * tables, and a time range can be provided (in milliseconds). The WAL is filtered to the passed set 069 * of tables and the output can optionally be mapped to another set of tables. WAL replay can also 070 * generate HFiles for later bulk importing, in that case the WAL is replayed for a single table 071 * only. 072 */ 073@InterfaceAudience.Public 074public class WALPlayer extends Configured implements Tool { 075 private static final Logger LOG = LoggerFactory.getLogger(WALPlayer.class); 076 final static String NAME = "WALPlayer"; 077 public final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output"; 078 public final static String TABLES_KEY = "wal.input.tables"; 079 public final static String TABLE_MAP_KEY = "wal.input.tablesmap"; 080 public final static String INPUT_FILES_SEPARATOR_KEY = "wal.input.separator"; 081 public final static String IGNORE_MISSING_FILES = "wal.input.ignore.missing.files"; 082 public final static String MULTI_TABLES_SUPPORT = "wal.multi.tables.support"; 083 084 protected static final String tableSeparator = ";"; 085 086 private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name"; 087 088 public WALPlayer() { 089 } 090 091 protected WALPlayer(final Configuration c) { 092 super(c); 093 } 094 095 /** 096 * A mapper that just writes out KeyValues. This one can be used together with 097 * {@link KeyValueSortReducer} 098 * @deprecated Use {@link WALCellMapper}. Will be removed from 3.0 onwards 099 */ 100 @Deprecated 101 static class WALKeyValueMapper extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> { 102 private Set<String> tableSet = new HashSet<String>(); 103 private boolean multiTableSupport = false; 104 105 @Override 106 public void map(WALKey key, WALEdit value, Context context) throws IOException { 107 try { 108 TableName table = key.getTableName(); 109 if (tableSet.contains(table.getNameAsString())) { 110 for (Cell cell : value.getCells()) { 111 if (WALEdit.isMetaEditFamily(cell)) { 112 continue; 113 } 114 KeyValue keyValue = KeyValueUtil.ensureKeyValue(cell); 115 byte[] outKey = multiTableSupport 116 ? Bytes.add(table.getName(), Bytes.toBytes(tableSeparator), 117 CellUtil.cloneRow(keyValue)) 118 : CellUtil.cloneRow(keyValue); 119 context.write(new ImmutableBytesWritable(outKey), keyValue); 120 } 121 } 122 } catch (InterruptedException e) { 123 e.printStackTrace(); 124 } 125 } 126 127 @Override 128 public void setup(Context context) throws IOException { 129 Configuration conf = context.getConfiguration(); 130 String[] tables = conf.getStrings(TABLES_KEY); 131 this.multiTableSupport = conf.getBoolean(MULTI_TABLES_SUPPORT, false); 132 for (String table : tables) { 133 tableSet.add(table); 134 } 135 } 136 } 137 138 /** 139 * A mapper that just writes out Cells. This one can be used together with {@link CellSortReducer} 140 */ 141 static class WALCellMapper extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Cell> { 142 private Set<String> tableSet = new HashSet<>(); 143 private boolean multiTableSupport = false; 144 145 @Override 146 public void map(WALKey key, WALEdit value, Context context) throws IOException { 147 try { 148 TableName table = key.getTableName(); 149 if (tableSet.contains(table.getNameAsString())) { 150 for (Cell cell : value.getCells()) { 151 if (WALEdit.isMetaEditFamily(cell)) { 152 continue; 153 } 154 155 // Set sequenceId from WALKey, since it is not included by WALCellCodec. The sequenceId 156 // on WALKey is the same value that was on the cells in the WALEdit. This enables 157 // CellSortReducer to use sequenceId to disambiguate duplicate cell timestamps. 158 // See HBASE-27649 159 PrivateCellUtil.setSequenceId(cell, key.getSequenceId()); 160 161 byte[] outKey = multiTableSupport 162 ? Bytes.add(table.getName(), Bytes.toBytes(tableSeparator), CellUtil.cloneRow(cell)) 163 : CellUtil.cloneRow(cell); 164 context.write(new ImmutableBytesWritable(outKey), new MapReduceExtendedCell(cell)); 165 } 166 } 167 } catch (InterruptedException e) { 168 e.printStackTrace(); 169 } 170 } 171 172 @Override 173 public void setup(Context context) throws IOException { 174 Configuration conf = context.getConfiguration(); 175 String[] tables = conf.getStrings(TABLES_KEY); 176 this.multiTableSupport = conf.getBoolean(MULTI_TABLES_SUPPORT, false); 177 Collections.addAll(tableSet, tables); 178 } 179 } 180 181 /** 182 * Enum for map metrics. Keep it out here rather than inside in the Map inner-class so we can find 183 * associated properties. 184 */ 185 protected static enum Counter { 186 /** Number of aggregated writes */ 187 PUTS, 188 /** Number of aggregated deletes */ 189 DELETES, 190 CELLS_READ, 191 CELLS_WRITTEN, 192 WALEDITS 193 } 194 195 /** 196 * A mapper that writes out {@link Mutation} to be directly applied to a running HBase instance. 197 */ 198 protected static class WALMapper 199 extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> { 200 private Map<TableName, TableName> tables = new TreeMap<>(); 201 202 @Override 203 public void map(WALKey key, WALEdit value, Context context) throws IOException { 204 context.getCounter(Counter.WALEDITS).increment(1); 205 try { 206 if (tables.isEmpty() || tables.containsKey(key.getTableName())) { 207 TableName targetTable = 208 tables.isEmpty() ? key.getTableName() : tables.get(key.getTableName()); 209 ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName()); 210 Put put = null; 211 Delete del = null; 212 Cell lastCell = null; 213 for (Cell cell : value.getCells()) { 214 context.getCounter(Counter.CELLS_READ).increment(1); 215 // Filtering WAL meta marker entries. 216 if (WALEdit.isMetaEditFamily(cell)) { 217 continue; 218 } 219 // Allow a subclass filter out this cell. 220 if (filter(context, cell)) { 221 // A WALEdit may contain multiple operations (HBASE-3584) and/or 222 // multiple rows (HBASE-5229). 223 // Aggregate as much as possible into a single Put/Delete 224 // operation before writing to the context. 225 if ( 226 lastCell == null || lastCell.getTypeByte() != cell.getTypeByte() 227 || !CellUtil.matchingRows(lastCell, cell) 228 ) { 229 // row or type changed, write out aggregate KVs. 230 if (put != null) { 231 context.write(tableOut, put); 232 context.getCounter(Counter.PUTS).increment(1); 233 } 234 if (del != null) { 235 context.write(tableOut, del); 236 context.getCounter(Counter.DELETES).increment(1); 237 } 238 if (CellUtil.isDelete(cell)) { 239 del = new Delete(CellUtil.cloneRow(cell)); 240 } else { 241 put = new Put(CellUtil.cloneRow(cell)); 242 } 243 } 244 if (CellUtil.isDelete(cell)) { 245 del.add(cell); 246 } else { 247 put.add(cell); 248 } 249 context.getCounter(Counter.CELLS_WRITTEN).increment(1); 250 } 251 lastCell = cell; 252 } 253 // write residual KVs 254 if (put != null) { 255 context.write(tableOut, put); 256 context.getCounter(Counter.PUTS).increment(1); 257 } 258 if (del != null) { 259 context.getCounter(Counter.DELETES).increment(1); 260 context.write(tableOut, del); 261 } 262 } 263 } catch (InterruptedException e) { 264 e.printStackTrace(); 265 } 266 } 267 268 protected boolean filter(Context context, final Cell cell) { 269 return true; 270 } 271 272 @Override 273 protected void 274 cleanup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context) 275 throws IOException, InterruptedException { 276 super.cleanup(context); 277 } 278 279 @SuppressWarnings("checkstyle:EmptyBlock") 280 @Override 281 public void setup(Context context) throws IOException { 282 String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY); 283 String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY); 284 if (tableMap == null) { 285 tableMap = tablesToUse; 286 } 287 if (tablesToUse == null) { 288 // Then user wants all tables. 289 } else if (tablesToUse.length != tableMap.length) { 290 // this can only happen when WALMapper is used directly by a class other than WALPlayer 291 throw new IOException("Incorrect table mapping specified ."); 292 } 293 int i = 0; 294 if (tablesToUse != null) { 295 for (String table : tablesToUse) { 296 tables.put(TableName.valueOf(table), TableName.valueOf(tableMap[i++])); 297 } 298 } 299 } 300 } 301 302 void setupTime(Configuration conf, String option) throws IOException { 303 String val = conf.get(option); 304 if (null == val) { 305 return; 306 } 307 long ms; 308 try { 309 // first try to parse in user friendly form 310 ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime(); 311 } catch (ParseException pe) { 312 try { 313 // then see if just a number of ms's was specified 314 ms = Long.parseLong(val); 315 } catch (NumberFormatException nfe) { 316 throw new IOException( 317 option + " must be specified either in the form 2001-02-20T16:35:06.99 " 318 + "or as number of milliseconds"); 319 } 320 } 321 conf.setLong(option, ms); 322 } 323 324 /** 325 * Sets up the actual job. 326 * @param args The command line parameters. 327 * @return The newly created job. 328 * @throws IOException When setting up the job fails. 329 */ 330 public Job createSubmittableJob(String[] args) throws IOException { 331 Configuration conf = getConf(); 332 setupTime(conf, WALInputFormat.START_TIME_KEY); 333 setupTime(conf, WALInputFormat.END_TIME_KEY); 334 String inputDirs = args[0]; 335 String[] tables = args.length == 1 ? new String[] {} : args[1].split(","); 336 String[] tableMap; 337 if (args.length > 2) { 338 tableMap = args[2].split(","); 339 if (tableMap.length != tables.length) { 340 throw new IOException("The same number of tables and mapping must be provided."); 341 } 342 } else { 343 // if no mapping is specified, map each table to itself 344 tableMap = tables; 345 } 346 347 boolean multiTableSupport = conf.getBoolean(MULTI_TABLES_SUPPORT, false); 348 conf.setStrings(TABLES_KEY, tables); 349 conf.setStrings(TABLE_MAP_KEY, tableMap); 350 conf.set(FileInputFormat.INPUT_DIR, inputDirs); 351 Job job = Job.getInstance(conf, 352 conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime())); 353 job.setJarByClass(WALPlayer.class); 354 355 job.setInputFormatClass(WALInputFormat.class); 356 job.setMapOutputKeyClass(ImmutableBytesWritable.class); 357 358 String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); 359 if (hfileOutPath != null) { 360 LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs); 361 362 if (!multiTableSupport && tables.length != 1) { 363 throw new IOException("Exactly one table must be specified for the bulk export option"); 364 } 365 366 // WALPlayer needs ExtendedCellSerialization so that sequenceId can be propagated when 367 // sorting cells in CellSortReducer 368 job.getConfiguration().setBoolean(HFileOutputFormat2.EXTENDED_CELL_SERIALIZATION_ENABLED_KEY, 369 true); 370 371 // the bulk HFile case 372 List<TableName> tableNames = getTableNameList(tables); 373 374 job.setMapperClass(WALCellMapper.class); 375 job.setReducerClass(CellSortReducer.class); 376 Path outputDir = new Path(hfileOutPath); 377 FileOutputFormat.setOutputPath(job, outputDir); 378 job.setMapOutputValueClass(MapReduceExtendedCell.class); 379 try (Connection conn = ConnectionFactory.createConnection(conf);) { 380 List<TableInfo> tableInfoList = new ArrayList<>(); 381 for (TableName tableName : tableNames) { 382 Table table = conn.getTable(tableName); 383 RegionLocator regionLocator = getRegionLocator(tableName, conf, conn); 384 tableInfoList.add(new TableInfo(table.getDescriptor(), regionLocator)); 385 } 386 if (multiTableSupport) { 387 MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfoList); 388 } else { 389 TableInfo tableInfo = tableInfoList.get(0); 390 HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.getTableDescriptor(), 391 tableInfo.getRegionLocator()); 392 } 393 } 394 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), 395 org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); 396 } else { 397 // output to live cluster 398 job.setMapperClass(WALMapper.class); 399 job.setOutputFormatClass(MultiTableOutputFormat.class); 400 TableMapReduceUtil.addDependencyJars(job); 401 TableMapReduceUtil.initCredentials(job); 402 // No reducers. 403 job.setNumReduceTasks(0); 404 } 405 String codecCls = WALCellCodec.getWALCellCodecClass(conf).getName(); 406 try { 407 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), 408 Class.forName(codecCls)); 409 } catch (Exception e) { 410 throw new IOException("Cannot determine wal codec class " + codecCls, e); 411 } 412 return job; 413 } 414 415 private List<TableName> getTableNameList(String[] tables) { 416 List<TableName> list = new ArrayList<TableName>(); 417 for (String name : tables) { 418 list.add(TableName.valueOf(name)); 419 } 420 return list; 421 } 422 423 /** 424 * Print usage 425 * @param errorMsg Error message. Can be null. 426 */ 427 private void usage(final String errorMsg) { 428 if (errorMsg != null && errorMsg.length() > 0) { 429 System.err.println("ERROR: " + errorMsg); 430 } 431 System.err.println("Usage: " + NAME + " [options] <WAL inputdir> [<tables> <tableMappings>]"); 432 System.err.println(" <WAL inputdir> directory of WALs to replay."); 433 System.err.println(" <tables> comma separated list of tables. If no tables specified,"); 434 System.err.println(" all are imported (even hbase:meta if present)."); 435 System.err.println( 436 " <tableMappings> WAL entries can be mapped to a new set of tables by " + "passing"); 437 System.err 438 .println(" <tableMappings>, a comma separated list of target " + "tables."); 439 System.err 440 .println(" If specified, each table in <tables> must have a " + "mapping."); 441 System.err.println("To generate HFiles to bulk load instead of loading HBase directly, pass:"); 442 System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output"); 443 System.err.println(" Only one table can be specified, and no mapping allowed!"); 444 System.err.println("To specify a time range, pass:"); 445 System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]"); 446 System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]"); 447 System.err.println(" The start and the end date of timerange (inclusive). The dates can be"); 448 System.err 449 .println(" expressed in milliseconds-since-epoch or yyyy-MM-dd'T'HH:mm:ss.SS " + "format."); 450 System.err.println(" E.g. 1234567890120 or 2009-02-13T23:32:30.12"); 451 System.err.println("Other options:"); 452 System.err.println(" -D" + JOB_NAME_CONF_KEY + "=jobName"); 453 System.err.println(" Use the specified mapreduce job name for the wal player"); 454 System.err.println(" -Dwal.input.separator=' '"); 455 System.err.println(" Change WAL filename separator (WAL dir names use default ','.)"); 456 System.err.println("For performance also consider the following options:\n" 457 + " -Dmapreduce.map.speculative=false\n" + " -Dmapreduce.reduce.speculative=false"); 458 } 459 460 /** 461 * Main entry point. 462 * @param args The command line parameters. 463 * @throws Exception When running the job fails. 464 */ 465 public static void main(String[] args) throws Exception { 466 int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args); 467 System.exit(ret); 468 } 469 470 @Override 471 public int run(String[] args) throws Exception { 472 if (args.length < 1) { 473 usage("Wrong number of arguments: " + args.length); 474 System.exit(-1); 475 } 476 Job job = createSubmittableJob(args); 477 return job.waitForCompletion(true) ? 0 : 1; 478 } 479 480 private static RegionLocator getRegionLocator(TableName tableName, Configuration conf, 481 Connection conn) throws IOException { 482 if (SnapshotRegionLocator.shouldUseSnapshotRegionLocator(conf, tableName)) { 483 return SnapshotRegionLocator.create(conf, tableName); 484 } 485 486 return conn.getRegionLocator(tableName); 487 } 488}