Changeset 8198c839


Ignore:
Timestamp:
Jul 3, 2014 1:03:56 PM (7 years ago)
Author:
zzz <zzz@…>
Branches:
master
Children:
aa3d367
Parents:
75ff798
Message:
  • Base64:
    • Catch numerous decoding errors that were previously misdecoded (ticket #1318)
    • Improve decoding efficiency, reduce copies
    • encode(String) now uses UTF-8 encoding
    • decode() now accepts short strings without trailing '='
    • whitespace in decode will now cause an error, was previously ignored
    • Cleanups
File:
1 edited

Legend:

Unmodified
Added
Removed
  • core/java/src/net/i2p/data/Base64.java

    r75ff798 r8198c839  
    4343
    4444    /**
     45     *  Output will be a multiple of 4 chars, including 0-2 trailing '='
     46     *  As of 0.9.14, encodes the UTF-8 encoding of source. Prior to that, used the platform's encoding.
     47     *
    4548     *  @param source if null will return ""
    4649     */
    4750    public static String encode(String source) {
    48         return (source != null ? encode(source.getBytes()) : "");
    49     }
    50 
    51     /**
     51        return (source != null ? encode(DataHelper.getUTF8(source)) : "");
     52    }
     53
     54    /**
     55     *  Output will be a multiple of 4 chars, including 0-2 trailing '='
    5256     *  @param source if null will return ""
    5357     */
     
    5761
    5862    /**
     63     *  Output will be a multiple of 4 chars, including 0-2 trailing '='
    5964     *  @param source if null will return ""
    6065     */
     
    6469
    6570    /**
     71     *  Output will be a multiple of 4 chars, including 0-2 trailing '='
    6672     *  @param source if null will return ""
    6773     *  @param useStandardAlphabet Warning, must be false for I2P compatibility
     
    7278
    7379    /**
     80     *  Output will be a multiple of 4 chars, including 0-2 trailing '='
    7481     *  @param source if null will return ""
    7582     *  @param useStandardAlphabet Warning, must be false for I2P compatibility
     
    8087
    8188    /**
    82      *  Decodes data from Base64 notation.
     89     *  Decodes data from Base64 notation using the I2P alphabet.
     90     *
     91     *  As of 0.9.14, does not require trailing '=' if remaining bits are zero.
     92     *  Prior to that, trailing 1, 2, or 3 chars were ignored.
     93     *
     94     *  As of 0.9.14, trailing garbage after an '=' will cause an error.
     95     *  Prior to that, it was ignored.
     96     *
     97     *  As of 0.9.14, whitespace will cause an error.
     98     *  Prior to that, it was ignored.
     99     *
    83100     *  @param s Base 64 encoded string using the I2P alphabet A-Z, a-z, 0-9, -, ~
    84101     *  @return the decoded data, null on error
     
    123140     * Translates a Base64 value to either its 6-bit reconstruction value
    124141     * or a negative number indicating some other meaning.
     142     * As of 0.9.14 this is the decoding for the I2P alphabet. See safeDecode().
    125143     **/
    126144    private final static byte[] DECODABET = { -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal  0 -  8
     
    132150                                             -5, // Whitespace: Space
    133151                                             -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 33 - 42
    134                                              62, // Plus sign at decimal 43
    135                                              -9, -9, -9, // Decimal 44 - 46
    136                                              63, // Slash at decimal 47
     152                                             //62, -9, -9, -9, 63, // + , - . / (43-47) NON-I2P
     153                                             -9, -9, 62, -9, -9,   // + , - . / (43-47) I2P
    137154                                             52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // Numbers zero through nine
    138155                                             -9, -9, -9, // Decimal 58 - 60
     
    144161                                             26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, // Letters 'a' through 'm'
    145162                                             39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // Letters 'n' through 'z'
    146                                              -9, -9, -9, -9 // Decimal 123 - 126
    147     /*,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 127 - 139
     163                                             //-9, -9, -9, -9 // Decimal 123 - 126 (126 is '~') NON-I2P
     164                                             -9, -9, -9, 63   // Decimal 123 - 126 (126 is '~') I2P
     165    ,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 127 - 139
    148166     -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 140 - 152
    149167     -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 153 - 165
     
    154172     -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 218 - 230
    155173     -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 231 - 243
    156      -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9         // Decimal 244 - 255 */
     174     -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9         // Decimal 244 - 255
    157175    };
     176
    158177
    159178    private final static byte WHITE_SPACE_ENC = -5; // Indicates white space in encoding
     
    168187        if (args.length == 0) {
    169188            help();
    170             return;
    171189        }
    172190        runApp(args);
     
    176194        String cmd = args[0].toLowerCase(Locale.US);
    177195        if ("encodestring".equals(cmd)) {
    178             System.out.println(encode(args[1].getBytes()));
     196            if (args.length != 2)
     197                help();
     198            System.out.println(encode(DataHelper.getUTF8(args[1])));
    179199            return;
    180200        }
    181201        if ("decodestring".equals(cmd)) {
     202            if (args.length != 2)
     203                help();
    182204            byte[] dec = decode(args[1]);
    183205            if (dec != null) {
     
    186208                } catch (IOException ioe) {
    187209                    System.err.println("output error " + ioe);
     210                    System.exit(1);
    188211                }
    189212            } else {
    190213                System.err.println("decode error");
     214                System.exit(1);
    191215            }
    192216            return;
     217        }
     218        if ("test".equals(cmd)) {
     219            System.err.println("test disabled");
     220            System.exit(1);
     221        }
     222        if (!("encode".equals(cmd) || "decode".equals(cmd))) {
     223            System.err.println("unknown command " + cmd);
     224            System.exit(1);
    193225        }
    194226        InputStream in = System.in;
     
    203235            if ("encode".equals(cmd)) {
    204236                encode(in, out);
    205                 return;
    206             }
    207             if ("decode".equals(cmd)) {
     237            } else {
    208238                decode(in, out);
    209                 return;
    210239            }
    211240        } catch (IOException ioe) {
     
    239268    }
    240269
     270    /** exits 1, never returns */
    241271    private static void help() {
    242         System.out.println("Syntax: Base64 encode <inFile> <outFile>");
    243         System.out.println("or    : Base64 encode <inFile>");
    244         System.out.println("or    : Base64 encode");
    245         System.out.println("or    : Base64 decode <inFile> <outFile>");
    246         System.out.println("or    : Base64 decode <inFile>");
    247         System.out.println("or    : Base64 decode");
    248         System.out.println("or    : Base64 encodestring 'string to encode'");
    249         System.out.println("or    : Base64 decodestring 'string to decode'");
    250         System.out.println("or    : Base64 test");
     272        System.err.println("Usage: Base64 encode <inFile> <outFile>");
     273        System.err.println("       Base64 encode <inFile>");
     274        System.err.println("       Base64 encode (stdin to stdout)");
     275        System.err.println("       Base64 decode <inFile> <outFile>");
     276        System.err.println("       Base64 decode <inFile>");
     277        System.err.println("       Base64 decode (stdin to stdout)");
     278        System.err.println("       Base64 encodestring 'string to encode'");
     279        System.err.println("       Base64 decodestring 'string to decode'");
     280        System.err.println("       Base64 test");
     281        System.exit(1);
    251282    }
    252283
     
    459490    private static byte[] safeDecode(String source, boolean useStandardAlphabet) {
    460491        if (source == null) return null;
    461         String toDecode = null;
     492        String toDecode;
    462493        if (useStandardAlphabet) {
     494            //toDecode = source;
     495            toDecode = source.replace('/', '~');
     496            toDecode = toDecode.replace('+', '-');
     497        } else {
     498            //toDecode = source.replace('~', '/');
     499            //toDecode = toDecode.replace('-', '+');
    463500            toDecode = source;
    464         } else {
    465             toDecode = source.replace('~', '/');
    466             toDecode = toDecode.replace('-', '+');
    467501        }
    468502        return standardDecode(toDecode);
     
    601635     * @param destination the array to hold the conversion
    602636     * @param destOffset the index where output will be put
    603      * @return the number of decoded bytes converted
     637     * @return the number of decoded bytes converted 1-3, or -1 on error, never zero
    604638     * @since 1.3
    605639     */
    606640    private static int decode4to3(byte[] source, int srcOffset, byte[] destination, int destOffset) {
     641        byte decode0 = DECODABET[source[srcOffset++]];
     642        byte decode1 = DECODABET[source[srcOffset++]];
     643        if (decode0 < 0 || decode1 < 0)
     644            return -1;
     645
    607646        // Example: Dk==
    608         if (source[srcOffset + 2] == EQUALS_SIGN) {
    609             // Two ways to do the same thing. Don't know which way I like best.
    610             //int outBuff =   ( ( DECODABET[ source[ srcOffset    ] ] << 24 ) >>>  6 )
    611             //              | ( ( DECODABET[ source[ srcOffset + 1] ] << 24 ) >>> 12 );
    612             int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18)
    613                           | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12);
    614 
    615             destination[destOffset] = (byte) (outBuff >>> 16);
     647        if (source[srcOffset] == EQUALS_SIGN) {
     648            if (source[srcOffset + 1] != EQUALS_SIGN)
     649                return -1;
     650            // verify no extra bits
     651            if ((decode1 & 0x0f) != 0)
     652                return -1;
     653            int outBuff = (decode0 << 18)
     654                          | (decode1 << 12);
     655            destination[destOffset] = (byte) (outBuff >> 16);
    616656            return 1;
    617657        }
    618658
    619659        // Example: DkL=
    620         else if (source[srcOffset + 3] == EQUALS_SIGN) {
    621             // Two ways to do the same thing. Don't know which way I like best.
    622             //int outBuff =   ( ( DECODABET[ source[ srcOffset     ] ] << 24 ) >>>  6 )
    623             //              | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 )
    624             //              | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 );
    625             int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18)
    626                           | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12)
    627                           | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6);
    628 
    629             destination[destOffset] = (byte) (outBuff >>> 16);
    630             destination[destOffset + 1] = (byte) (outBuff >>> 8);
     660        else if (source[srcOffset + 1] == EQUALS_SIGN) {
     661            byte decode2 = DECODABET[source[srcOffset]];
     662            if (decode2 < 0)
     663                return -1;
     664            // verify no extra bits
     665            if ((decode2 & 0x03) != 0)
     666                return -1;
     667            int outBuff = (decode0 << 18)
     668                          | (decode1 << 12)
     669                          | (decode2 << 6);
     670            destination[destOffset++] = (byte) (outBuff >> 16);
     671            destination[destOffset] = (byte) (outBuff >> 8);
    631672            return 2;
    632673        }
     
    634675        // Example: DkLE
    635676        else {
    636             try {
    637                 // Two ways to do the same thing. Don't know which way I like best.
    638                 //int outBuff =   ( ( DECODABET[ source[ srcOffset     ] ] << 24 ) >>>  6 )
    639                 //              | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 )
    640                 //              | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 )
    641                 //              | ( ( DECODABET[ source[ srcOffset + 3 ] ] << 24 ) >>> 24 );
    642                 int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18)
    643                               | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12)
    644                               | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6)
    645                               | ((DECODABET[source[srcOffset + 3]] & 0xFF));
    646 
    647                 destination[destOffset] = (byte) (outBuff >> 16);
    648                 destination[destOffset + 1] = (byte) (outBuff >> 8);
    649                 destination[destOffset + 2] = (byte) (outBuff);
    650 
    651                 return 3;
    652             } catch (Exception e) {
    653                 System.out.println("" + source[srcOffset] + ": " + (DECODABET[source[srcOffset]]));
    654                 System.out.println("" + source[srcOffset + 1] + ": " + (DECODABET[source[srcOffset + 1]]));
    655                 System.out.println("" + source[srcOffset + 2] + ": " + (DECODABET[source[srcOffset + 2]]));
    656                 System.out.println("" + source[srcOffset + 3] + ": " + (DECODABET[source[srcOffset + 3]]));
     677            byte decode2 = DECODABET[source[srcOffset++]];
     678            byte decode3 = DECODABET[source[srcOffset]];
     679            if (decode2 < 0 || decode3 < 0)
    657680                return -1;
    658             } //e nd catch
     681            int outBuff = (decode0 << 18)
     682                          | (decode1 << 12)
     683                          | (decode2 << 6)
     684                          | decode3;
     685            destination[destOffset++] = (byte) (outBuff >> 16);
     686            destination[destOffset++] = (byte) (outBuff >> 8);
     687            destination[destOffset] = (byte) (outBuff);
     688            return 3;
    659689        }
    660690    } // end decodeToBytes
     
    662692    /**
    663693     * Decodes data from Base64 notation.
     694     * As of 0.9.14, this uses the I2P alphabet, so it is not "standard".
    664695     *
    665696     * @param s the string to decode
     
    668699     */
    669700    private static byte[] standardDecode(String s) {
    670         byte[] bytes = new byte[s.length()];
    671         for (int i = 0; i < bytes.length; i++)
    672             bytes[i] = (byte)(s.charAt(i) & 0xFF);
     701        // We use getUTF8() instead of getASCII() so we may verify
     702        // there's no UTF-8 in there.
     703        byte[] bytes = DataHelper.getUTF8(s);
     704        if (bytes.length != s.length())
     705            return null;
    673706        return decode(bytes, 0, bytes.length);
    674707    } // end decode
     
    679712     * Equivlaent to calling
    680713     * <code>new String( decode( s ) )</code>
    681      * WARNING this uses the locale's encoding, it may not be what you want.
     714     *
     715     * As of 0.9.14, decodes as UTF-8. Prior to that, it used the platform's encoding.
     716     * For best results, decoded data should be 7 bit.
     717     *
     718     * As of 0.9.14, does not require trailing '=' if remaining bits are zero.
     719     * Prior to that, trailing 1, 2, or 3 chars were ignored.
     720     *
     721     * As of 0.9.14, trailing garbage after an '=' will cause an error.
     722     * Prior to that, it was ignored.
     723     *
     724     * As of 0.9.14, whitespace will cause an error.
     725     * Prior to that, it was ignored.
    682726     *
    683727     * @param s the strind to decode
    684      * @return The data as a string
     728     * @return The data as a string, or null on error
    685729     * @since 1.4
    686      * @throws NPE on error?
    687730     */
    688731    public static String decodeToString(String s) {
    689         return new String(decode(s));
     732        byte[] b = decode(s);
     733        if (b == null)
     734            return null;
     735        return DataHelper.getUTF8(b);
    690736    } // end decodeToString
    691737
     
    693739     * Decodes Base64 content in byte array format and returns
    694740     * the decoded byte array.
     741     *
     742     * As of 0.9.14, does not require trailing '=' if remaining bits are zero.
     743     * Prior to that, trailing 1, 2, or 3 chars were ignored.
     744     *
     745     * As of 0.9.14, trailing garbage after an '=' will cause an error.
     746     * Prior to that, it was ignored.
     747     *
     748     * As of 0.9.14, whitespace will cause an error.
     749     * Prior to that, it was ignored.
    695750     *
    696751     * @param source The Base64 encoded data
     
    702757    private static byte[] decode(byte[] source, int off, int len) {
    703758        int len34 = len * 3 / 4;
    704         byte[] outBuff = new byte[len34]; // Upper limit on size of output
     759        byte[] outBuff = new byte[len34]; // size of output
    705760        int outBuffPosn = 0;
    706761
    707         byte[] b4 = new byte[4];
    708         int b4Posn = 0;
    709         int i = 0;
    710         byte sbiCrop = 0;
    711         byte sbiDecode = 0;
    712         for (i = 0; i < len; i++) {
    713             sbiCrop = (byte) (source[i] & 0x7f); // Only the low seven bits
    714             sbiDecode = DECODABET[sbiCrop];
    715 
    716             if (sbiDecode >= WHITE_SPACE_ENC) // White space, Equals sign or better
    717             {
    718                 if (sbiDecode >= EQUALS_SIGN_ENC) {
    719                     b4[b4Posn++] = sbiCrop;
    720                     if (b4Posn > 3) {
    721                         outBuffPosn += decode4to3(b4, 0, outBuff, outBuffPosn);
    722                         b4Posn = 0;
    723 
    724                         // If that was the equals sign, break out of 'for' loop
    725                         if (sbiCrop == EQUALS_SIGN) break;
    726                     } // end if: quartet built
    727 
    728                 } // end if: equals sign or better
    729 
    730             } // end if: white space, equals sign or better
    731             else {
    732                 //_log.warn("Bad Base64 input character at " + i + ": " + source[i] + "(decimal)");
     762        int i = off;
     763        int end = off + len;
     764        int converted = 0;
     765        while (i + 3 < end) {
     766            converted = decode4to3(source, i, outBuff, outBuffPosn);
     767            if (converted < 0)
    733768                return null;
    734             } // end else:
    735         } // each input character
    736 
     769            outBuffPosn += converted;
     770            i += 4;
     771            if (converted < 3)
     772                break;
     773        }
     774
     775        // process any remaining without '='
     776        int remaining = end - i;
     777        if (remaining > 0) {
     778            if (converted > 0 && converted < 3)
     779                return null;
     780            if (remaining == 1 || remaining > 3)
     781                return null;
     782            byte[] b4 = new byte[4];
     783            b4[0] = source[i++];
     784            b4[1] = source[i++];
     785            if (remaining == 3)
     786                b4[2] = source[i];
     787            else
     788                b4[2] = EQUALS_SIGN;
     789            b4[3] = EQUALS_SIGN;
     790            converted = decode4to3(b4, 0, outBuff, outBuffPosn);
     791            if (converted < 0)
     792                return null;
     793            outBuffPosn += converted;
     794        }
     795
     796        // don't copy unless we have to
     797        if (outBuffPosn == outBuff.length)
     798            return outBuff;
     799        // and we shouldn't ever... would have returned null before
    737800        byte[] out = new byte[outBuffPosn];
    738801        System.arraycopy(outBuff, 0, out, 0, outBuffPosn);
Note: See TracChangeset for help on using the changeset viewer.