/************************************************************************************************************************ * * PhotoGrok Script * * Runs in multiple passes to detect duplicate files using MD5. Each tree build pass must point at a different folder(s). * * The second (or any subsequent) passes can take a long time if there are a lot of identical files. The script compares * files between the first pass and subsequent passes. If the name and size match, an MD5 is computed to determine if * the files are identical. A byte by byte comparison might be faster. * * The script fails in later versions of Java if using the Web Start version (apparently a security violation accessing * file.length() from JavaScript). Use the standalone Mac or PC versions instead. * * This script must be used in one of the following ways. With a filter as follows (always make it the last filter tab): * * Directory,FileName | Javascript Expression | match(Directory,FileName) * * The filter version will return only files that have a match from the first tree build. You can also create a custom * tag with, for example, two nodes: "found" and "notfound" (use the filter above as a starting point). Then build your * tree using that custom tag. This will take twice as long as the filter only version but could be optimized. * *************************************************************************************************************************/ // get HashMap to store data between each pass var map = getMap(); var matches = 0; var recorded = 0; var md5 = null; var hexArray = new java.lang.String("0123456789ABCDEF").toCharArray(); var init = !map.containsKey('names'); // first pass...or at least first pass since map was cleared if(init){ map.put('names', new java.util.HashMap()); map.put('files', new java.util.HashMap()); } function match(directory, filename){ var file = new java.io.File(directory, filename); var namesMap = map.get('names'); var filesMap = map.get('files'); var lcfn = filename.toLowerCase(); if(init){ if(namesMap.containsKey(lcfn)){ namesMap.get(lcfn).push(file.length()); filesMap.get(lcfn).push(file); }else{ var sizearray = new Array(); var filearray = new Array(); sizearray[0] = file.length(); filearray[0] = file; namesMap.put(lcfn, sizearray); filesMap.put(lcfn, filearray); } recorded++; return true; } var match = false; if(namesMap.containsKey(lcfn)){ var sizearray = namesMap.get(lcfn); match = sizearray.indexOf(file.length()) != -1; if(match){ var filearray = filesMap.get(lcfn); var f = filearray[sizearray.indexOf(file.length())]; match = getMD5(file).equals(getMD5(f)); } } if(match){ matches++; } return match; } function getMD5(file){ var data = java.lang.reflect.Array.newInstance(java.lang.Byte.TYPE, new java.lang.Long(file.length()).intValue()); var fis = new java.io.FileInputStream(file); fis.read(data); fis.close(); if(md5 == null){ md5 = java.security.MessageDigest.getInstance("MD5"); } return bytesToHex(md5.digest(data)); } function bytesToHex(b){ var hexChars = java.lang.reflect.Array.newInstance(java.lang.Character.TYPE, b.length * 2); for(var j = 0; j < b.length; j++){ var v = b[j] & 0xFF; hexChars[j *2] = hexArray[v >>> 4]; hexChars[j * 2 + 1] = hexArray[v & 0x0F]; } var res = new java.lang.String(hexChars); return res; } // called when tree build is complete function done(){ if(init){ showMessage('Recorded ' + recorded + ' files'); }else{ showMessage('Found ' + matches + ' matches'); } }