001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.net.util; 019 020import java.math.BigInteger; 021import java.nio.charset.StandardCharsets; 022 023 024/** 025 * Provides Base64 encoding and decoding as defined by RFC 2045. 026 * 027 * <p> 028 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose 029 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. 030 * </p> 031 * <p> 032 * The class can be parameterized in the following manner with various constructors: 033 * <ul> 034 * <li>URL-safe mode: Default off.</li> 035 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of 036 * 4 in the encoded data. 037 * <li>Line separator: Default is CRLF ("\r\n")</li> 038 * </ul> 039 * <p> 040 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode 041 * character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). 042 * </p> 043 * 044 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> 045 * @since 2.2 046 */ 047public class Base64 { 048 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 049 050 private static final int DEFAULT_BUFFER_SIZE = 8192; 051 052 /** 053 * Chunk size per RFC 2045 section 6.8. 054 * 055 * <p> 056 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 057 * equal signs. 058 * </p> 059 * 060 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 061 */ 062 static final int CHUNK_SIZE = 76; 063 064 /** 065 * Chunk separator per RFC 2045 section 2.1. 066 * 067 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 068 */ 069 private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 070 071 /** 072 * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet" 073 * equivalents as specified in Table 1 of RFC 2045. 074 * 075 * Thanks to "commons" project in ws.apache.org for this code. 076 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 077 */ 078 private static final byte[] STANDARD_ENCODE_TABLE = { 079 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 080 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 081 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 082 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 083 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' 084 }; 085 086 /** 087 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / 088 * changed to - and _ to make the encoded Base64 results more URL-SAFE. 089 * This table is only used when the Base64's mode is set to URL-SAFE. 090 */ 091 private static final byte[] URL_SAFE_ENCODE_TABLE = { 092 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 093 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 094 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 095 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 096 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' 097 }; 098 099 /** 100 * Byte used to pad output. 101 */ 102 private static final byte PAD = '='; 103 104 /** 105 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified in 106 * Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64 107 * alphabet but fall within the bounds of the array are translated to -1. 108 * 109 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both 110 * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit). 111 * 112 * Thanks to "commons" project in ws.apache.org for this code. 113 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 114 */ 115 private static final byte[] DECODE_TABLE = { 116 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 118 -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 119 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 120 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 121 24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 122 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 123 }; 124 125 /** Mask used to extract 6 bits, used when encoding */ 126 private static final int MASK_6BITS = 0x3f; 127 128 /** Mask used to extract 8 bits, used in decoding base64 bytes */ 129 private static final int MASK_8BITS = 0xff; 130 131 // The static final fields above are used for the original static byte[] methods on Base64. 132 // The private member fields below are used with the new streaming approach, which requires 133 // some state be preserved between calls of encode() and decode(). 134 135 /** 136 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able 137 * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch 138 * between the two modes. 139 */ 140 private final byte[] encodeTable; 141 142 /** 143 * Line length for encoding. Not used when decoding. A value of zero or less implies no chunking of the base64 144 * encoded data. 145 */ 146 private final int lineLength; 147 148 /** 149 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 150 */ 151 private final byte[] lineSeparator; 152 153 /** 154 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 155 * <code>decodeSize = 3 + lineSeparator.length;</code> 156 */ 157 private final int decodeSize; 158 159 /** 160 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 161 * <code>encodeSize = 4 + lineSeparator.length;</code> 162 */ 163 private final int encodeSize; 164 165 /** 166 * Buffer for streaming. 167 */ 168 private byte[] buffer; 169 170 /** 171 * Position where next character should be written in the buffer. 172 */ 173 private int pos; 174 175 /** 176 * Position where next character should be read from the buffer. 177 */ 178 private int readPos; 179 180 /** 181 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use it to 182 * make sure each encoded line never goes beyond lineLength (if lineLength > 0). 183 */ 184 private int currentLinePos; 185 186 /** 187 * Writes to the buffer only occur after every 3 reads when encoding, an every 4 reads when decoding. This variable 188 * helps track that. 189 */ 190 private int modulus; 191 192 /** 193 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this Base64 object becomes useless, 194 * and must be thrown away. 195 */ 196 private boolean eof; 197 198 /** 199 * Place holder for the 3 bytes we're dealing with for our base64 logic. Bitwise operations store and extract the 200 * base64 encoding or decoding from this variable. 201 */ 202 private int x; 203 204 /** 205 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 206 * <p> 207 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. 208 * </p> 209 * 210 * <p> 211 * When decoding all variants are supported. 212 * </p> 213 */ 214 public Base64() { 215 this(false); 216 } 217 218 /** 219 * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode. 220 * <p> 221 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. 222 * </p> 223 * 224 * <p> 225 * When decoding all variants are supported. 226 * </p> 227 * 228 * @param urlSafe 229 * if <code>true</code>, URL-safe encoding is used. In most cases this should be set to 230 * <code>false</code>. 231 * @since 1.4 232 */ 233 public Base64(final boolean urlSafe) { 234 this(CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe); 235 } 236 237 /** 238 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 239 * <p> 240 * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is 241 * STANDARD_ENCODE_TABLE. 242 * </p> 243 * <p> 244 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 245 * </p> 246 * <p> 247 * When decoding all variants are supported. 248 * </p> 249 * 250 * @param lineLength 251 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4). 252 * If {@code lineLength <= 0}, then the output will not be divided into lines (chunks). Ignored when decoding. 253 * @since 1.4 254 */ 255 public Base64(final int lineLength) { 256 this(lineLength, CHUNK_SEPARATOR); 257 } 258 259 /** 260 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 261 * <p> 262 * When encoding the line length and line separator are given in the constructor, and the encoding table is 263 * STANDARD_ENCODE_TABLE. 264 * </p> 265 * <p> 266 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 267 * </p> 268 * <p> 269 * When decoding all variants are supported. 270 * </p> 271 * 272 * @param lineLength 273 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4). 274 * If {@code lineLength <= 0}, then the output will not be divided into lines (chunks). Ignored when decoding. 275 * @param lineSeparator 276 * Each line of encoded data will end with this sequence of bytes. 277 * @throws IllegalArgumentException 278 * Thrown when the provided lineSeparator included some base64 characters. 279 * @since 1.4 280 */ 281 public Base64(final int lineLength, final byte[] lineSeparator) { 282 this(lineLength, lineSeparator, false); 283 } 284 285 /** 286 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 287 * <p> 288 * When encoding the line length and line separator are given in the constructor, and the encoding table is 289 * STANDARD_ENCODE_TABLE. 290 * </p> 291 * <p> 292 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 293 * </p> 294 * <p> 295 * When decoding all variants are supported. 296 * </p> 297 * 298 * @param lineLength 299 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4). 300 * If {@code lineLength <= 0}, then the output will not be divided into lines (chunks). Ignored when decoding. 301 * @param lineSeparator 302 * Each line of encoded data will end with this sequence of bytes. 303 * @param urlSafe 304 * Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode 305 * operations. Decoding seamlessly handles both modes. 306 * @throws IllegalArgumentException 307 * The provided lineSeparator included some base64 characters. That's not going to work! 308 * @since 1.4 309 */ 310 public Base64(int lineLength, byte[] lineSeparator, final boolean urlSafe) { 311 if (lineSeparator == null) { 312 lineLength = 0; // disable chunk-separating 313 lineSeparator = NetConstants.EMPTY_BTYE_ARRAY; // this just gets ignored 314 } 315 this.lineLength = lineLength > 0 ? (lineLength / 4) * 4 : 0; 316 this.lineSeparator = new byte[lineSeparator.length]; 317 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); 318 if (lineLength > 0) { 319 this.encodeSize = 4 + lineSeparator.length; 320 } else { 321 this.encodeSize = 4; 322 } 323 this.decodeSize = this.encodeSize - 1; 324 if (containsBase64Byte(lineSeparator)) { 325 final String sep = newStringUtf8(lineSeparator); 326 throw new IllegalArgumentException("lineSeperator must not contain base64 characters: [" + sep + "]"); 327 } 328 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE; 329 } 330 331 /** 332 * Returns our current encode mode. True if we're URL-SAFE, false otherwise. 333 * 334 * @return true if we're in URL-SAFE mode, false otherwise. 335 * @since 1.4 336 */ 337 public boolean isUrlSafe() { 338 return this.encodeTable == URL_SAFE_ENCODE_TABLE; 339 } 340 341 /** 342 * Returns true if this Base64 object has buffered data for reading. 343 * 344 * @return true if there is Base64 object still available for reading. 345 */ 346 boolean hasData() { 347 return this.buffer != null; 348 } 349 350 /** 351 * Returns the amount of buffered data available for reading. 352 * 353 * @return The amount of buffered data available for reading. 354 */ 355 int avail() { 356 return buffer != null ? pos - readPos : 0; 357 } 358 359 /** Doubles our buffer. */ 360 private void resizeBuffer() { 361 if (buffer == null) { 362 buffer = new byte[DEFAULT_BUFFER_SIZE]; 363 pos = 0; 364 readPos = 0; 365 } else { 366 final byte[] b = new byte[buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; 367 System.arraycopy(buffer, 0, b, 0, buffer.length); 368 buffer = b; 369 } 370 } 371 372 /** 373 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail 374 * bytes. Returns how many bytes were actually extracted. 375 * 376 * @param b 377 * byte[] array to extract the buffered data into. 378 * @param bPos 379 * position in byte[] array to start extraction at. 380 * @param bAvail 381 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 382 * @return The number of bytes successfully extracted into the provided byte[] array. 383 */ 384 int readResults(final byte[] b, final int bPos, final int bAvail) { 385 if (buffer != null) { 386 final int len = Math.min(avail(), bAvail); 387 if (buffer != b) { 388 System.arraycopy(buffer, readPos, b, bPos, len); 389 readPos += len; 390 if (readPos >= pos) { 391 buffer = null; 392 } 393 } else { 394 // Re-using the original consumer's output array is only 395 // allowed for one round. 396 buffer = null; 397 } 398 return len; 399 } 400 return eof ? -1 : 0; 401 } 402 403 /** 404 * Sets the streaming buffer. This is a small optimization where we try to buffer directly to the consumer's output 405 * array for one round (if the consumer calls this method first) instead of starting our own buffer. 406 * 407 * @param out 408 * byte[] array to buffer directly to. 409 * @param outPos 410 * Position to start buffering into. 411 * @param outAvail 412 * Amount of bytes available for direct buffering. 413 */ 414 void setInitialBuffer(final byte[] out, final int outPos, final int outAvail) { 415 // We can re-use consumer's original output array under 416 // special circumstances, saving on some System.arraycopy(). 417 if (out != null && out.length == outAvail) { 418 buffer = out; 419 pos = outPos; 420 readPos = outPos; 421 } 422 } 423 424 /** 425 * <p> 426 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with 427 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last 428 * remaining bytes (if not multiple of 3). 429 * </p> 430 * <p> 431 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 432 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 433 * </p> 434 * 435 * @param in 436 * byte[] array of binary data to base64 encode. 437 * @param inPos 438 * Position to start reading data from. 439 * @param inAvail 440 * Amount of bytes available from input for encoding. 441 */ 442 void encode(final byte[] in, int inPos, final int inAvail) { 443 if (eof) { 444 return; 445 } 446 // inAvail < 0 is how we're informed of EOF in the underlying data we're 447 // encoding. 448 if (inAvail < 0) { 449 eof = true; 450 if (buffer == null || buffer.length - pos < encodeSize) { 451 resizeBuffer(); 452 } 453 switch (modulus) { 454 case 1 : 455 buffer[pos++] = encodeTable[(x >> 2) & MASK_6BITS]; 456 buffer[pos++] = encodeTable[(x << 4) & MASK_6BITS]; 457 // URL-SAFE skips the padding to further reduce size. 458 if (encodeTable == STANDARD_ENCODE_TABLE) { 459 buffer[pos++] = PAD; 460 buffer[pos++] = PAD; 461 } 462 break; 463 464 case 2 : 465 buffer[pos++] = encodeTable[(x >> 10) & MASK_6BITS]; 466 buffer[pos++] = encodeTable[(x >> 4) & MASK_6BITS]; 467 buffer[pos++] = encodeTable[(x << 2) & MASK_6BITS]; 468 // URL-SAFE skips the padding to further reduce size. 469 if (encodeTable == STANDARD_ENCODE_TABLE) { 470 buffer[pos++] = PAD; 471 } 472 break; 473 default: 474 break; // other values ignored 475 } 476 if (lineLength > 0 && pos > 0) { 477 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length); 478 pos += lineSeparator.length; 479 } 480 } else { 481 for (int i = 0; i < inAvail; i++) { 482 if (buffer == null || buffer.length - pos < encodeSize) { 483 resizeBuffer(); 484 } 485 modulus = (++modulus) % 3; 486 int b = in[inPos++]; 487 if (b < 0) { 488 b += 256; 489 } 490 x = (x << 8) + b; 491 if (0 == modulus) { 492 buffer[pos++] = encodeTable[(x >> 18) & MASK_6BITS]; 493 buffer[pos++] = encodeTable[(x >> 12) & MASK_6BITS]; 494 buffer[pos++] = encodeTable[(x >> 6) & MASK_6BITS]; 495 buffer[pos++] = encodeTable[x & MASK_6BITS]; 496 currentLinePos += 4; 497 if (lineLength > 0 && lineLength <= currentLinePos) { 498 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length); 499 pos += lineSeparator.length; 500 currentLinePos = 0; 501 } 502 } 503 } 504 } 505 } 506 507 /** 508 * <p> 509 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once 510 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" 511 * call is not necessary when decoding, but it doesn't hurt, either. 512 * </p> 513 * <p> 514 * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are 515 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, 516 * garbage-out philosophy: it will not check the provided data for validity. 517 * </p> 518 * <p> 519 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 520 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 521 * </p> 522 * 523 * @param in 524 * byte[] array of ascii data to base64 decode. 525 * @param inPos 526 * Position to start reading data from. 527 * @param inAvail 528 * Amount of bytes available from input for encoding. 529 */ 530 void decode(final byte[] in, int inPos, final int inAvail) { 531 if (eof) { 532 return; 533 } 534 if (inAvail < 0) { 535 eof = true; 536 } 537 for (int i = 0; i < inAvail; i++) { 538 if (buffer == null || buffer.length - pos < decodeSize) { 539 resizeBuffer(); 540 } 541 final byte b = in[inPos++]; 542 if (b == PAD) { 543 // We're done. 544 eof = true; 545 break; 546 } 547 if (b >= 0 && b < DECODE_TABLE.length) { 548 final int result = DECODE_TABLE[b]; 549 if (result >= 0) { 550 modulus = (++modulus) % 4; 551 x = (x << 6) + result; 552 if (modulus == 0) { 553 buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS); 554 buffer[pos++] = (byte) ((x >> 8) & MASK_8BITS); 555 buffer[pos++] = (byte) (x & MASK_8BITS); 556 } 557 } 558 } 559 } 560 561 // Two forms of EOF as far as base64 decoder is concerned: actual 562 // EOF (-1) and first time '=' character is encountered in stream. 563 // This approach makes the '=' padding characters completely optional. 564 if (eof && modulus != 0) { 565 x = x << 6; 566 switch (modulus) { 567 case 2 : 568 x = x << 6; 569 buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS); 570 break; 571 case 3 : 572 buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS); 573 buffer[pos++] = (byte) ((x >> 8) & MASK_8BITS); 574 break; 575 default: 576 break; // other values ignored 577 } 578 } 579 } 580 581 /** 582 * Returns whether or not the <code>octet</code> is in the base 64 alphabet. 583 * 584 * @param octet 585 * The value to test 586 * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise. 587 * @since 1.4 588 */ 589 public static boolean isBase64(final byte octet) { 590 return octet == PAD || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1); 591 } 592 593 /** 594 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the 595 * method treats whitespace as valid. 596 * 597 * @param arrayOctet 598 * byte array to test 599 * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; 600 * false, otherwise 601 */ 602 public static boolean isArrayByteBase64(final byte[] arrayOctet) { 603 for (final byte element : arrayOctet) { 604 if (!isBase64(element) && !isWhiteSpace(element)) { 605 return false; 606 } 607 } 608 return true; 609 } 610 611 /** 612 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. 613 * 614 * @param arrayOctet 615 * byte array to test 616 * @return <code>true</code> if any byte is a valid character in the Base64 alphabet; false herwise 617 */ 618 private static boolean containsBase64Byte(final byte[] arrayOctet) { 619 for (final byte element : arrayOctet) 620 { 621 if (isBase64(element)) { 622 return true; 623 } 624 } 625 return false; 626 } 627 628 /** 629 * Encodes binary data using the base64 algorithm but does not chunk the output. 630 * 631 * @param binaryData 632 * binary data to encode 633 * @return byte[] containing Base64 characters in their UTF-8 representation. 634 */ 635 public static byte[] encodeBase64(final byte[] binaryData) { 636 return encodeBase64(binaryData, false); 637 } 638 639 /** 640 * Encodes binary data using the base64 algorithm into 76 character blocks separated by CRLF. 641 * <p> 642 * For a non-chunking version, see {@link #encodeBase64StringUnChunked(byte[])}. 643 * 644 * @param binaryData 645 * binary data to encode 646 * @return String containing Base64 characters. 647 * @since 1.4 648 */ 649 public static String encodeBase64String(final byte[] binaryData) { 650 return newStringUtf8(encodeBase64(binaryData, true)); 651 } 652 653 /** 654 * Encodes binary data using the base64 algorithm, without using chunking. 655 * <p> 656 * For a chunking version, see {@link #encodeBase64String(byte[])}. 657 * 658 * @param binaryData 659 * binary data to encode 660 * @return String containing Base64 characters. 661 * @since 3.2 662 */ 663 public static String encodeBase64StringUnChunked(final byte[] binaryData) { 664 return newStringUtf8(encodeBase64(binaryData, false)); 665 } 666 667 /** 668 * Encodes binary data using the base64 algorithm. 669 * 670 * @param binaryData 671 * binary data to encode 672 * @param useChunking whether to split the output into chunks 673 * @return String containing Base64 characters. 674 * @since 3.2 675 */ 676 public static String encodeBase64String(final byte[] binaryData, final boolean useChunking) { 677 return newStringUtf8(encodeBase64(binaryData, useChunking)); 678 } 679 680 /** 681 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The 682 * url-safe variation emits - and _ instead of + and / characters. 683 * 684 * @param binaryData 685 * binary data to encode 686 * @return byte[] containing Base64 characters in their UTF-8 representation. 687 * @since 1.4 688 */ 689 public static byte[] encodeBase64URLSafe(final byte[] binaryData) { 690 return encodeBase64(binaryData, false, true); 691 } 692 693 /** 694 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The 695 * url-safe variation emits - and _ instead of + and / characters. 696 * 697 * @param binaryData 698 * binary data to encode 699 * @return String containing Base64 characters 700 * @since 1.4 701 */ 702 public static String encodeBase64URLSafeString(final byte[] binaryData) { 703 return newStringUtf8(encodeBase64(binaryData, false, true)); 704 } 705 706 /** 707 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks 708 * 709 * @param binaryData 710 * binary data to encode 711 * @return Base64 characters chunked in 76 character blocks 712 */ 713 public static byte[] encodeBase64Chunked(final byte[] binaryData) { 714 return encodeBase64(binaryData, true); 715 } 716 717 /** 718 * Decodes a String containing containing characters in the Base64 alphabet. 719 * 720 * @param pArray 721 * A String containing Base64 character data 722 * @return a byte array containing binary data 723 * @since 1.4 724 */ 725 public byte[] decode(final String pArray) { 726 return decode(getBytesUtf8(pArray)); 727 } 728 729 private byte[] getBytesUtf8(final String pArray) { 730 return pArray.getBytes(StandardCharsets.UTF_8); 731 } 732 733 /** 734 * Decodes a byte[] containing containing characters in the Base64 alphabet. 735 * 736 * @param pArray 737 * A byte array containing Base64 character data 738 * @return a byte array containing binary data 739 */ 740 public byte[] decode(final byte[] pArray) { 741 reset(); 742 if (pArray == null || pArray.length == 0) { 743 return pArray; 744 } 745 final long len = (pArray.length * 3) / 4; 746 final byte[] buf = new byte[(int) len]; 747 setInitialBuffer(buf, 0, buf.length); 748 decode(pArray, 0, pArray.length); 749 decode(pArray, 0, -1); // Notify decoder of EOF. 750 751 // Would be nice to just return buf (like we sometimes do in the encode 752 // logic), but we have no idea what the line-length was (could even be 753 // variable). So we cannot determine ahead of time exactly how big an 754 // array is necessary. Hence the need to construct a 2nd byte array to 755 // hold the final result: 756 757 final byte[] result = new byte[pos]; 758 readResults(result, 0, result.length); 759 return result; 760 } 761 762 /** 763 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 764 * 765 * @param binaryData 766 * Array containing binary data to encode. 767 * @param isChunked 768 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks 769 * @return Base64-encoded data. 770 * @throws IllegalArgumentException 771 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} 772 */ 773 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) { 774 return encodeBase64(binaryData, isChunked, false); 775 } 776 777 /** 778 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 779 * 780 * @param binaryData 781 * Array containing binary data to encode. 782 * @param isChunked 783 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks 784 * @param urlSafe 785 * if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters. 786 * @return Base64-encoded data. 787 * @throws IllegalArgumentException 788 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} 789 * @since 1.4 790 */ 791 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) { 792 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE); 793 } 794 795 /** 796 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 797 * 798 * @param binaryData 799 * Array containing binary data to encode. 800 * @param isChunked 801 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks 802 * @param urlSafe 803 * if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters. 804 * @param maxResultSize 805 * The maximum result size to accept. 806 * @return Base64-encoded data. 807 * @throws IllegalArgumentException 808 * Thrown when the input array needs an output array bigger than maxResultSize 809 * @since 1.4 810 */ 811 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe, 812 final int maxResultSize) { 813 if (binaryData == null || binaryData.length == 0) { 814 return binaryData; 815 } 816 817 final long len = getEncodeLength(binaryData, isChunked ? CHUNK_SIZE : 0, 818 isChunked ? CHUNK_SEPARATOR : NetConstants.EMPTY_BTYE_ARRAY); 819 if (len > maxResultSize) { 820 throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + len 821 + ") than the specified maxium size of " + maxResultSize); 822 } 823 824 final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe); 825 return b64.encode(binaryData); 826 } 827 828 /** 829 * Decodes a Base64 String into octets. 830 * 831 * @param base64String 832 * String containing Base64 data 833 * @return Array containing decoded data. 834 * @since 1.4 835 */ 836 public static byte[] decodeBase64(final String base64String) { 837 return new Base64().decode(base64String); 838 } 839 840 /** 841 * Decodes Base64 data into octets. 842 * 843 * @param base64Data 844 * Byte array containing Base64 data 845 * @return Array containing decoded data. 846 */ 847 public static byte[] decodeBase64(final byte[] base64Data) { 848 return new Base64().decode(base64Data); 849 } 850 851 /** 852 * Checks if a byte value is whitespace or not. 853 * 854 * @param byteToCheck 855 * the byte to check 856 * @return true if byte is whitespace, false otherwise 857 */ 858 private static boolean isWhiteSpace(final byte byteToCheck) { 859 switch (byteToCheck) { 860 case ' ' : 861 case '\n' : 862 case '\r' : 863 case '\t' : 864 return true; 865 default : 866 return false; 867 } 868 } 869 870 /** 871 * Encodes a byte[] containing binary data, into a String containing characters in the Base64 alphabet. 872 * 873 * @param pArray 874 * a byte array containing binary data 875 * @return A String containing only Base64 character data 876 * @since 1.4 877 */ 878 public String encodeToString(final byte[] pArray) { 879 return newStringUtf8(encode(pArray)); 880 } 881 882 private static String newStringUtf8(final byte[] encode) { 883 return new String(encode, StandardCharsets.UTF_8); 884 } 885 886 /** 887 * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet. 888 * 889 * @param pArray 890 * a byte array containing binary data 891 * @return A byte array containing only Base64 character data 892 */ 893 public byte[] encode(final byte[] pArray) { 894 reset(); 895 if (pArray == null || pArray.length == 0) { 896 return pArray; 897 } 898 final long len = getEncodeLength(pArray, lineLength, lineSeparator); 899 byte[] buf = new byte[(int) len]; 900 setInitialBuffer(buf, 0, buf.length); 901 encode(pArray, 0, pArray.length); 902 encode(pArray, 0, -1); // Notify encoder of EOF. 903 // Encoder might have resized, even though it was unnecessary. 904 if (buffer != buf) { 905 readResults(buf, 0, buf.length); 906 } 907 // In URL-SAFE mode we skip the padding characters, so sometimes our 908 // final length is a bit smaller. 909 if (isUrlSafe() && pos < buf.length) { 910 final byte[] smallerBuf = new byte[pos]; 911 System.arraycopy(buf, 0, smallerBuf, 0, pos); 912 buf = smallerBuf; 913 } 914 return buf; 915 } 916 917 /** 918 * Pre-calculates the amount of space needed to base64-encode the supplied array. 919 * 920 * @param pArray byte[] array which will later be encoded 921 * @param chunkSize line-length of the output (<= 0 means no chunking) between each 922 * chunkSeparator (e.g. CRLF). 923 * @param chunkSeparator the sequence of bytes used to separate chunks of output (e.g. CRLF). 924 * 925 * @return amount of space needed to encoded the supplied array. Returns 926 * a long since a max-len array will require Integer.MAX_VALUE + 33%. 927 */ 928 private static long getEncodeLength(final byte[] pArray, int chunkSize, final byte[] chunkSeparator) { 929 // base64 always encodes to multiples of 4. 930 chunkSize = (chunkSize / 4) * 4; 931 932 long len = (pArray.length * 4) / 3; 933 final long mod = len % 4; 934 if (mod != 0) { 935 len += 4 - mod; 936 } 937 if (chunkSize > 0) { 938 final boolean lenChunksPerfectly = len % chunkSize == 0; 939 len += (len / chunkSize) * chunkSeparator.length; 940 if (!lenChunksPerfectly) { 941 len += chunkSeparator.length; 942 } 943 } 944 return len; 945 } 946 947 // Implementation of integer encoding used for crypto 948 /** 949 * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature 950 * 951 * @param pArray 952 * a byte array containing base64 character data 953 * @return A BigInteger 954 * @since 1.4 955 */ 956 public static BigInteger decodeInteger(final byte[] pArray) { 957 return new BigInteger(1, decodeBase64(pArray)); 958 } 959 960 /** 961 * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature 962 * 963 * @param bigInt 964 * a BigInteger 965 * @return A byte array containing base64 character data 966 * @throws NullPointerException 967 * if null is passed in 968 * @since 1.4 969 */ 970 public static byte[] encodeInteger(final BigInteger bigInt) { 971 if (bigInt == null) { 972 throw new NullPointerException("encodeInteger called with null parameter"); 973 } 974 return encodeBase64(toIntegerBytes(bigInt), false); 975 } 976 977 /** 978 * Returns a byte-array representation of a <code>BigInteger</code> without sign bit. 979 * 980 * @param bigInt 981 * <code>BigInteger</code> to be converted 982 * @return a byte array representation of the BigInteger parameter 983 */ 984 static byte[] toIntegerBytes(final BigInteger bigInt) { 985 int bitlen = bigInt.bitLength(); 986 // round bitlen 987 bitlen = ((bitlen + 7) >> 3) << 3; 988 final byte[] bigBytes = bigInt.toByteArray(); 989 990 if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) { 991 return bigBytes; 992 } 993 // set up params for copying everything but sign bit 994 int startSrc = 0; 995 int len = bigBytes.length; 996 997 // if bigInt is exactly byte-aligned, just skip signbit in copy 998 if ((bigInt.bitLength() % 8) == 0) { 999 startSrc = 1; 1000 len--; 1001 } 1002 final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec 1003 final byte[] resizedBytes = new byte[bitlen / 8]; 1004 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len); 1005 return resizedBytes; 1006 } 1007 1008 /** 1009 * Resets this Base64 object to its initial newly constructed state. 1010 */ 1011 private void reset() { 1012 buffer = null; 1013 pos = 0; 1014 readPos = 0; 1015 currentLinePos = 0; 1016 modulus = 0; 1017 eof = false; 1018 } 1019 1020 // Getters for use in testing 1021 1022 int getLineLength() { 1023 return lineLength; 1024 } 1025 1026 byte[] getLineSeparator() { 1027 return lineSeparator.clone(); 1028 } 1029}