Sunday, February 13, 2005

javascript compactor in java

To my astonishment I found out that there are very few free javascript compressors out there, and most of them don't actually work. I came across Mike Hall's Javascript Crunchinator, a javascript compressor which, being written in Javascript, is very slow and didn't work for my sample complex script. The other worth mentioning compressor was a Perl snippet written by Mihai Bazon. I decided I wanted one written in Java instead, so I sat down and cranked out the following just so that you have yet one more option...

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * The compactor can be used to compact java scripts by removing comments, 
 * leading and trailing whitespaces, newlines, and condense any spaces between
 * operators, parentheses, brackets etc. It only works if all statements 
 * (including classes and simple functions) are properly
 * terminated by semicolons. If you read the script from a file using 
 * readLine() method make sure you put the new line character back at the end 
 * of the line, as readLine() consumes it.
 */
public class JavascriptCompactor {
    public static String compactJSCode(String sCode) {
        // Protect single and double quoted strings.
        Pattern strings = Pattern.compile("\"(\\\\.|[^\"\\\\])*\"
                                          |'(\\\\.|[^'\\\\])*'");

        Matcher m = strings.matcher(sCode);
        StringBuffer sb = new StringBuffer();
        HashMap theStrings = new HashMap();
        while (m.find()) {
            String sReplacement = "__STR_" + theStrings.size();
            theStrings.put(sReplacement, m.group());
            m.appendReplacement(sb, sReplacement);
        }
        m.appendTail(sb);
        sCode = sb.toString();

        // remove C style comments
        Pattern pattern = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL);
        sCode = pattern.matcher(sCode).replaceAll("");

        // remove C++ style comments.
        sCode = sCode.replaceAll("//.*?\\n", "");

        // remove leading/trailing whitespaces.
        sCode = sCode.replaceAll("(?:(?:^|\\n)\\s+|\\s+(?:$|\n))", "");

        // remove newlines.
        sCode = sCode.replaceAll("\\r?\\n", "");

        // remove spaces around operators etc.
        sCode = sCode.replaceAll("\\s+", " ");
        sCode = sCode.replaceAll("\\s([\\x21\\x25\\x26\\x28\\x29 \                
             \\x2a\\x2b\\x2c\\x2d\\x2f\\x3a\\x3b\\x3c\\x3d\\x3e\\x3f
              \\x5b\\x5d\\x5c\\x7b\\x7c\\x7d\\x7e])", "$1");
        sCode = sCode.replaceAll("([\\x21\\x25\\x26\\x28\\x29
              \\x2a\\x2b\\x2c\\x2d\\x2f\\x3a\\x3b\\x3c\\x3d\\x3e\\x3f
              \\x5b\\x5d\\x5c\\x7b\\x7c\\x7d\\x7e])\\s", "$1");

        // Put back strings.
        m = Pattern.compile("__STR_\\d+").matcher(sCode);
        StringBuffer result = new StringBuffer();
        int iLastEnd = 0;
        while (m.find()) {
            result.append(sCode.substring(iLastEnd, m.start()));
            result.append((String) theStrings.get(m.group()));

            iLastEnd = m.end();
        }
        result.append(sCode.substring(iLastEnd));

        return result.toString();
    }
}