Code-Only: Arbitrary alphabet encoding (aka BaseN encoding) for base2 through base36.

/*
    Copyright (c) 2004 DigiTec Web Consultants, LLC.  All rights reserved.

    The use of this software is for test and performance purposes only.
    You may not use this software in any commercial applications without
    the express permission of the copyright holder.  You may add to or
    modify the code contained here-in to cause it to run slower without
    contacting the copyright holder, however, any attempts to make this
    code run faster should be documented on:
   
    http://weblogs.asp.net/justin_rogers/articles/253641.aspx
   
    I reserve the right to change or modify the publicly available version
    of this code at any time.  I will not provide version protection, so
    if you have reliance on a particular build of this software, then make
    your own back-ups.
   
    You must laugh, at least a little, when reading this licensing agreement,
    unless of course you don't have a sense of humor.  In all seriousness,
    excluding the laughter, laughter in itself does not void this license
    agreement, nor compromise it's ability to legally bind you.
   
    You must not remove this notice, or any other, from this software.
*/
/*
    VERSION 1.1, November 7th, 2004
        Renames -
            Integer has been renamed to Int
           
        Notes -
            Added functions for all base types
                byte, sbyte, short, ushort, int, uint, long, ulong
   
   
    VERSION 1.0, November 7th, 2004
        Integer2Base() -
            Converts an integer value to a given alphabet where the length
            of the alphabet is assumed to be the base. Handles 0 and all
            positive integers.
           
        Integer2BasePow2() -
            Adds performance increases to processing pow 2 alphabets.
            This results in ~10% more raw processing speed. For specific
            bases additional enhancements are possible as well.
       
        Base2Integer() -
            Processes valid numerical strings into integers based on
            a given alphabet. Partially validates input against the supplied
            alphabet, but may fail to process a number of possibilities.
           
            No overflow checking or negative number processing.

        Base2IntegerPow2() -
            Uses pow 2 alphabet enhancements. This results in a modest
            performance gain, but is in place for parity with Integer2BasePow2.
           
        Main() -
            Round Trip Validation
            Parity Parsing Between Pow2 and non Pow2 Functions
            Native Parsing for int parsing and conversion
           
        Notes -
            Currently non sequential arrays are not processed because of the
            binary searc reliance. We could improve the method to perform
            more complex conversions (Base64) by allowing unordered key
            alphabets.
           
            Optional: Convert methods to use a fixed 256 character array
            where character indexes supply a value rather than using indexes
            as values. Increased memory storage, but better performance on
            unordered key alphabets.
*/

using System;
using System.Globalization;

public class BaseParser {
    private static char[] base2 = new char[] { '0', '1' };
    private static char[] base7 = new char[] { '0', '1', '2', '3', '4', '5', '6' };
    private static char[] base8 = new char[] { '0', '1', '2', '3', '4', '5', '6', '7' };
    private static char[] base16 = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
    private static char[] base26 = new char[] {
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' };
    private static char[] base32 = new char[] {
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
        'G', 'H', 'J', 'K', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Y', 'Z' };
    private static char[] base36 = new char[] {
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
        'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' };

    private static void Main(string[] args) {
        Console.WriteLine("Validating Round Trip: {0}", ValidateRoundTrip());
        Console.WriteLine("Validating Parity Parsing: {0}", ValidateParityParsing());
        Console.WriteLine("Validating Native Parsing: {0}", ValidateNativeParsing());
    }
   
    private static void Performance() {
        int i = 0; int iterations = 1000000;
       
        // Warm-up
        Int2Base(i, base8);
        Int2BasePow2(i, base8);
       
        DateTime start, end;
       
        start = DateTime.Now;
        for(i = 0; i < iterations; i++) {
            Int2Base(i, base8);
        }
        end = DateTime.Now;
        Console.WriteLine("{0}: {1}", "Integer2Base", end - start);

        start = DateTime.Now;
        for(i = 0; i < iterations; i++) {
            Int2BasePow2(i, base8);
        }
        end = DateTime.Now;
        Console.WriteLine("{0}: {1}", "Integer2BasePow2", end - start);
    }
   
    private static bool ValidateNativeParsing() {
        int iterations = 10000;
       
        for(int i = 0; i < iterations; i++) {
            if ( Int2Base(i, base16) != i.ToString("X") ) { return false; }
            if ( Int2BasePow2(i, base16) != i.ToString("X") ) { return false; }
           
            if ( Base2Int(i.ToString("X"), base16) != int.Parse(i.ToString("X"), NumberStyles.HexNumber) ) { return false; }
            if ( Base2Int(Int2Base(i, base16), base16) != int.Parse(Int2Base(i, base16), NumberStyles.HexNumber) ) { return false; }

            if ( Base2IntPow2(i.ToString("X"), base16) != int.Parse(i.ToString("X"), NumberStyles.HexNumber) ) { return false; }
            if ( Base2IntPow2(Int2BasePow2(i, base16), base16) != int.Parse(Int2BasePow2(i, base16), NumberStyles.HexNumber) ) { return false; }
        }
       
        return true;
    }

    private static bool ValidateParityParsing() {
        int iterations = 10000;

        for(int i = 0; i < iterations; i++) {
            if ( Int2Base(i, base2) != Int2BasePow2(i, base2) ) { return false; }
            if ( Int2Base(i, base8) != Int2BasePow2(i, base8) ) { return false; }
            if ( Int2Base(i, base16) != Int2BasePow2(i, base16) ) { return false; }

            if ( Base2Int(Int2Base(i, base2), base2) != Base2IntPow2(Int2BasePow2(i, base2), base2) ) { return false; }
            if ( Base2Int(Int2Base(i, base8), base8) != Base2IntPow2(Int2BasePow2(i, base8), base8) ) { return false; }
            if ( Base2Int(Int2Base(i, base16), base16) != Base2IntPow2(Int2BasePow2(i, base16), base16) ) { return false; }
           
            if ( Base2Int(Int2BasePow2(i, base2), base2) != Base2IntPow2(Int2Base(i, base2), base2) ) { return false; }
            if ( Base2Int(Int2BasePow2(i, base8), base8) != Base2IntPow2(Int2Base(i, base8), base8) ) { return false; }
            if ( Base2Int(Int2BasePow2(i, base16), base16) != Base2IntPow2(Int2Base(i, base16), base16) ) { return false; }
        }
       
        return true;
    }
   
    private static bool ValidateRoundTrip() {
        int iterations = 10000;

        for(int i = 0; i < iterations; i++) {
            if ( i != Base2Int(Int2Base(i, base2), base2) ) { return false; }
            if ( i != Base2Int(Int2Base(i, base7), base7) ) { return false; }
            if ( i != Base2Int(Int2Base(i, base8), base8) ) { return false; }
            if ( i != Base2Int(Int2Base(i, base16), base16) ) { return false; }
            if ( i != Base2IntPow2(Int2BasePow2(i, base2), base2) ) { return false; }
            if ( i != Base2IntPow2(Int2BasePow2(i, base8), base8) ) { return false; }
            if ( i != Base2IntPow2(Int2BasePow2(i, base16), base16) ) { return false; }
        }
       
        return true;
    }

    // Alphabet should be in base numerical order such that:
    //
    // A = {n0, n1, ... , n(m-1)}
    // m = expected base
    // for all n(i): n(i-1) < n(i) < n(i+1)
    // for all n(i): n(j) = n(i) iff i = j
    //
    // val(n(i)) == i
    //
    // The last rule states that the index of the character is
    // the numerical value of the character as used for
    // conversion. The prior rules establish that the alphabet
    // is sorted and that the index uniquely identifies the
    // character and the value... The rules aren't formal proof
    // worthy and are barely math, so don't worry about them too
    // much.
    public static string Int2Base(int value, char[] alphabet) { return ULong2Base((ulong) value, alphabet); }
    public static string UInt2Base(uint value, char[] alphabet) { return ULong2Base((ulong) value, alphabet); }
    public static string Byte2Base(byte value, char[] alphabet) { return ULong2Base((ulong) value, alphabet); }
    public static string SByte2Base(sbyte value, char[] alphabet) { return ULong2Base((ulong) value, alphabet); }
    public static string Short2Base(short value, char[] alphabet) { return ULong2Base((ulong) value, alphabet); }
    public static string UShort2Base(ushort value, char[] alphabet) { return ULong2Base((ulong) value, alphabet); }
    public static string Long2Base(long value, char[] alphabet) { return ULong2Base((ulong) value, alphabet); }
    public static string ULong2Base(ulong value, char[] alphabet) {
        ulong b = (ulong) alphabet.Length;
        string conversion = "";
       
        do {
            conversion = alphabet[value%b] + conversion;
            value /= b;
        } while(value > 0);
       
        return conversion;
    }

    public static string Int2BasePow2(int value, char[] alphabet) { return ULong2BasePow2((ulong) value, alphabet); }
    public static string UInt2BasePow2(uint value, char[] alphabet) { return ULong2BasePow2((ulong) value, alphabet); }
    public static string Byte2BasePow2(byte value, char[] alphabet) { return ULong2BasePow2((ulong) value, alphabet); }
    public static string SByte2BasePow2(sbyte value, char[] alphabet) { return ULong2BasePow2((ulong) value, alphabet); }
    public static string Short2BasePow2(short value, char[] alphabet) { return ULong2BasePow2((ulong) value, alphabet); }
    public static string UShort2BasePow2(ushort value, char[] alphabet) { return ULong2BasePow2((ulong) value, alphabet); }
    public static string Long2BasePow2(long value, char[] alphabet) { return ULong2BasePow2((ulong) value, alphabet); }
    public static string ULong2BasePow2(ulong value, char[] alphabet) {
        ulong b = (ulong) alphabet.Length;
        ulong bitMask = b - 1;
        int bits = (int) Math.Ceiling(Math.Log(b, 2));
        string conversion = "";
       
        do {
            conversion = alphabet[value&bitMask] + conversion;
            value >>= bits;
        } while(value > 0);
       
        return conversion;
    }
   
    public static byte Base2Byte(string value, char[] alphabet) { return (byte) Base2ULong(value, alphabet); }
    public static sbyte Base2SByte(string value, char[] alphabet) { return (sbyte) Base2ULong(value, alphabet); }
    public static short Base2Short(string value, char[] alphabet) { return (short) Base2ULong(value, alphabet); }
    public static ushort Base2UShort(string value, char[] alphabet) { return (ushort) Base2ULong(value, alphabet); }
    public static int Base2Int(string value, char[] alphabet) { return (int) Base2ULong(value, alphabet); }
    public static uint Base2UInt(string value, char[] alphabet) { return (uint) Base2ULong(value, alphabet); }
    public static long Base2Long(string value, char[] alphabet) { return (long) Base2ULong(value, alphabet); }
    public static ulong Base2ULong(string value, char[] alphabet) {
        ulong b = (ulong) alphabet.Length;
        ulong conversion = 0;
       
        for(int i = 0; i < value.Length; i++) {
            int index = Array.BinarySearch(alphabet, value[i]);
            if ( index < 0 ) { return 0; }
           
            conversion = (conversion*b) + (ulong) index;
        }
       
        return conversion;
    }

    public static byte Base2BytePow2(string value, char[] alphabet) { return (byte) Base2ULongPow2(value, alphabet); }
    public static sbyte Base2SBytePow2(string value, char[] alphabet) { return (sbyte) Base2ULongPow2(value, alphabet); }
    public static short Base2ShortPow2(string value, char[] alphabet) { return (short) Base2ULongPow2(value, alphabet); }
    public static ushort Base2UShortPow2(string value, char[] alphabet) { return (ushort) Base2ULongPow2(value, alphabet); }
    public static int Base2IntPow2(string value, char[] alphabet) { return (int) Base2ULongPow2(value, alphabet); }
    public static uint Base2UIntPow2(string value, char[] alphabet) { return (uint) Base2ULongPow2(value, alphabet); }
    public static long Base2LongPow2(string value, char[] alphabet) { return (long) Base2ULongPow2(value, alphabet); }
    public static ulong Base2ULongPow2(string value, char[] alphabet) {
        ulong b = (ulong) alphabet.Length;
        int bits = (int) Math.Ceiling(Math.Log(b, 2));
        ulong conversion = 0;
       
        for(int i = 0; i < value.Length; i++) {
            int index = Array.BinarySearch(alphabet, value[i]);
            if ( index < 0 ) { return 0; }
           
            conversion = (conversion<<bits) + (ulong) index;
        }

        return conversion;
    }
}

Published Sunday, November 07, 2004 3:14 PM by Justin Rogers
Filed under:

Comments

Monday, August 27, 2007 10:54 PM by Justin Rodgers

# re: Code-Only: Arbitrary alphabet encoding (aka BaseN encoding) for base2 through base36.

you suck balls

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

Leave a Comment

(required) 
(required) 
(optional)
(required)