import java.io.*; public class Biased { public static class indexPair { public final int fst, snd; public indexPair(int fst, int snd) { this.fst = fst; this.snd = snd; }//constructor public String toString() { return( "(" + fst + "," + snd + ")" ); } }//indexPair class // ---------------------------------------------------------------------------- // Find the longest bi@sed.interval in `seq[]', // i.e. that is at least `fraction' made up of chars in `select', // and that is as long as possible. public static indexPair // longestBiased(...) longestBiased( String select, double fraction, char[] seq ) { double[] fstMinVal = new double[seq.length+1]; // new minimum values and int[] fstMinPos = new int [seq.length+1]; // their positions. double cummSum = 0, lastCummSum = 0; // cummulative sum & previous value int begins = 0, ends = -1, // best interval so far tracker = 0, // tracks fstMin...[] numMin = 1; // effective size of fstMin...[] fstMinVal[0] = 0; fstMinPos[0] = -1; // seq[0..-1] is the empty interval fraction = fraction / ( 1 + 0.5/seq.length ); // + a little "slack" if( fraction > 1 ) fraction = 1; // sanity! else if( fraction < 0 ) fraction = 0; double good = 1-fraction; double bad = -fraction; // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- // CONDITIONS OF USE: Provided that you | // (a) cite: | // L.Allison, | // Longest Biased Interval and Longest Non-Negative Sum Interval, | // Bioinformatics, V19, #10, pp1294-1295, July 2003, | // if this code is used in, or contributes | // towards, published work, and | // (b) maintain this notice, then | // (c) the code is available to you under the | // GNU General Public Licence (GPL) | // ( see http://www.gnu.org/copyleft/gpl.html ), | // otherwise the code is not available for use by you. | // -- L.A. 30/1/2003. | // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- int maxSelected = 0; // make boolean array for O(1)-time test of chars for( int i = 0; i < select.length(); i++ ) if( (int) select.charAt(i) > maxSelected ) maxSelected = (int) select.charAt(i); boolean[] selected = new boolean[maxSelected+1]; for( int i = 0; i <= maxSelected; i++ ) selected[i] = select.indexOf((char) i) >= 0; for( int i = 0; i < seq.length; i++ ) { // INV: seq[begins..ends] is longest bi@sed.interval of seq[0..i-1], // fstMinVal[] is strictly decreasing and with // fstMinPos[] shows the 1st occurences of minima. int seqI = (int) seq[i]; cummSum += (seqI <= maxSelected && selected[seqI]) ? good : bad; if( cummSum < fstMinVal[numMin-1] ) // a new, lower minimum { fstMinVal[numMin] = cummSum; fstMinPos[numMin] = i; numMin++; } // NB. fstMinVal[] is in *strictly* decreasing order. if( cummSum >= lastCummSum ) // it's increasing, getting better while( tracker > 0 && fstMinVal[tracker-1] <= cummSum ) tracker-- ; // tracker == 0 or fstMinVal[tracker-1] > cummSum else // cummSum < lastCummSum it's decreasing, getting worse while( tracker < numMin-1 && fstMinVal[tracker] > cummSum ) tracker++ ; // tracker == numMin-1 or fstMinVal[tracker] <= cummSum // (tracker == 0 or fstMinVal[tracker-1] > cummSum) and // (tracker == numMin-1 or fstMinVal[tracker] <= cummSum) // In fact, tracker is least index s.t. fstMinVal[tracker] <= cummSum, // so seq[fstMinPos[tracker]+1 .. i] is a candidate interval. if( i-fstMinPos[tracker] > ends-begins+1 ) // ? new longest ? { begins = fstMinPos[tracker]+1; ends = i; } lastCummSum = cummSum; }//for i // seq[begins..ends] is longest bi@sed.interval of seq[0..seq.length-1] return( new indexPair(begins, ends) ); }//longestBiased // ---------------------------------------------------------------------------- public static char[] readSeq(String fileName, String dontIgnore) { StringBuffer sb = new StringBuffer(); try{ File inputFile = new File( fileName ); FileReader in = new FileReader(inputFile); int c; while((c = in.read()) != -1) { if( dontIgnore.indexOf(c) < 0 ) continue; char ch = (char) c; sb.append( ch ); } } catch(Exception e) { System.out.println( "failed: " + e.getMessage() ); System.exit(1); } String s = sb.toString(); char[] arr = new char[s.length()]; for( int i = 0; i < s.length(); i++ ) arr[i] = s.charAt(i); return arr; }//readSeq public static indexPair run( String select, double fraction, char[] seq ) // Do a scan of the String at the given fraction and print the findings. { int cutoff = 20; indexPair interval = longestBiased( select, fraction, seq ); int len = interval.snd - interval.fst + 1; System.out.print( " >=" + fraction + ": [" + interval.fst + ".." + interval.snd + "] (" + len + ") " ); if( len <= cutoff ) // short, print all characters { for(int i = interval.fst; i <= interval.snd; i++ ) System.out.print( seq[i] ); } else // long, print first few and last few characters { for(int i = interval.fst; i < interval.fst + cutoff/2; i++) System.out.print( seq[i] ); System.out.print( "..." ); for(int i = interval.snd - cutoff/2 + 1; i <= interval.snd; i++) System.out.print( seq[i] ); } System.out.println(); return interval; }//run public static void main(String[] argv) { System.out.println("#--- Biased.java, L.A., CSSE, Monash, .au ---"); for(int i=0; i < argv.length; i++) // command line params if any System.out.println("# argv[" + i + "] = " + argv[i]); String select = ""; double fraction = 0.5; String fileName = ""; try { select = argv[0]; fraction = new Double(argv[1]).doubleValue(); // read the min' bi@s fileName = argv[2]; } catch(Exception e) { System.out.println("usage: java prog_name \"chars\" fraction filename"); System.exit(1); } char[] arr = readSeq( fileName, "acgtuACGTUrnyRNY" ); // INPUT THE STRING // NB. you may want to change the set of characters that are not ignored. System.out.print("# " + arr.length + "bp"); System.out.print(" "); for(int i=0; i < (arr.length < 65 ? arr.length : 65); i++) System.out.print( arr[i] ); System.out.println( arr.length > 65 ? "..." : "" ); if( fraction >= 0 && fraction <= 1 ) // a "proper" fraction run( select, fraction, arr ); else // specified fraction > 1 or < 0; take this as a flag to try several // decreasing fractions until we get the whole string, and stop. for( int ifrac = 100; ifrac > 0; ifrac -= 5 ) { indexPair interval = run( select, ifrac/100.0, arr ); if( interval.snd - interval.fst + 1 >= arr.length ) break; } System.out.println("#--- end ---"); }//main }//Biased class // --------------------------------------------------------------------- // October 2002, January 2003, L.Allison, CSSE, Monash University, .au | // http://www.csse.monash.edu.au/~lloyd/tildeProgLang/Java2/Biased/ | // Note the ``conditions of use'' in the code above. | // ---------------------------------------------------------------------