#! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh 5State.h <<'END_OF_5State.h' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* X * "5State.h" declarations for 5 States R-theory. X */ X#define fwd 0 X#define rev 1 X X#define adaptive 0 X#define fixed 1 X X#define BIG -1e7 X Xtypedef struct { X float D, H, V, A, B ; /* probs that the machine is in these states */ X float DM, DC, DH, DV, DA, DB ; /* probs of the arcs */ X float HM, HC, HH, HV ; X float VM, VC, VH, VV ; X float AD, AA ; X float BD, BB ; X} FiveStatePBlock ; X Xtypedef struct { X float D, H, V, A, B ; /* cost of the three states */ X float DM, DC, DH, DV, DA, DB ; /*weighted frequency counters*/ X float HM, HC, HH, HV ; X float VM, VC, VH, VV ; X float AD, AA ; X float BD, BB ; X} FS2cell ; X Xtypedef struct { X float D, H, V, A, B ; X} FS2pcell ; X X X#define FREE 0 /* let all the parms adapt freely */ X#define FIXED 1 /* fix the probs of the long indels */ X#define FiveStateML(A,B,ML,MatrixPart,mode,outfile,P,Accurarcy) \ X FiveStateMesgLength(A,B,ML,MatrixPart,mode,outfile,P,Accurarcy,FREE) X#define FiveState3ML(A,B,ML,MatrixPart,mode,outfile,P,Accurarcy) \ X FiveStateMesgLength(A,B,ML,MatrixPart,mode,outfile,P,Accurarcy,FIXED) X END_OF_5State.h if test 2272 -ne `wc -c <5State.h`; then echo shar: \"5State.h\" unpacked with wrong size! fi # end of overwriting check fi if test -f IO.c -a "${1}" != "-c" ; then echo shar: Will not over-write existing file \"IO.c\" else echo shar: Extracting \"IO.c\" \(19044 characters\) sed "s/^X//" >IO.c <<'END_OF_IO.c' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* X * "IO.c" Strings IO and miscellaneious routines X * X * Note : X * index for a sequence is 1..length. element 0 contains NULLSymbol X * index for an alignment is 1..length X * X * Entry Points : X * X * int ReadSequence(stream, s, length, Table) X * FILE *stream ; X * symbol *seq[] ; X * int *length ; X * AlphabetTable *Table ; X * X * read in a sequence from STREAM. X * The routine will allocate the required space for the sequence X * whose address will be returned in S. X * The length of the sequence is returned in LENGTH X * Also do internal symbol conversion and error checking X * X * ReadString(f,A,Table) X * FILE *f ; X * string *A ; X * AlphabetTable *Table ; X * X * read in a string -- a line of identifier, a line of description X * and a line of sequence X * X * GenbankReadString(f,A,Table) X * FILE *f ; X * string *A ; X * AlphabetTable *Table ; X * X * read in a string in genbank(?) format -- a line that begin with the X * delimiter '>' followed by an identifier, a line of description, X * then the sequence, in possibly more than one line. X * X * TranslateToSymbol(c, sequeuce, Table) X * char c[] ; X * symbol *sequence[] ; X * AlphabetTable *Table ; X * X * translate a character array into a sequence according to X * alphabet table TABLE X * X * X * fDisplaySequence(stream, s, length, Table) X * DisplaySequence(s, length, Table) X * fPrintSequence(stream, s, length, Table) X * PrintSequence(s, length, Table) X * X * symbol *s ; X * int length ; X * AlphabetTable *Table ; X * X * all these routine output the sequence s. X * DisplaySequence will break the sequence into LINELENGTH characters X * per line. X * X * PrintString(s, Table) X * string *s ; X * AlphabetTable *Table ; X * X * print a string (identifier, description and sequence) X * X * PrintAlignment(Alignment, AlignLength, A, B, Table) X * align Alignment[] ; X * symbol A[], B[] ; X * int AlignLength ; X * AplhabetTable *Table ; X * X * Print3DAlignment(Alignment, AlignLength, A, B, C, Table) X * align3D Alignment[] ; X * symbol A[], B[], C[] ; X * int AlignLength ; X * AplhabetTable *Table ; X * X * RandomString(s, Table) X * string *s ; X * AlphabetTable *Table ; X * X * allocate space for and generates a random sequence of X * length s->length X * X * RandomMutation(a, b, Mutations, PChanges, PInsertions, PDeletions, Table) X * string *a, *b ; X * int Mutations, PChanges, PInsertions, PDeletions ; X * AlphabetTable *Table ; X * X * do a Mutations number of random mutation on the sequence a. X * the resultant sequence is created and its pointer returned in b. X * PChanges, PInsertions and PDeletions are the ralative X * frequencies for Changes, Insertions and Deletions respectively X * X * StrConstantLengthMutation(a, b, Mutations, PChanges, PInsDel, Table) X * same a RandomMutation(), execpt that the length of the mutated X * sequence is kept the same as the original sequence. X * PInsDel is now the relative frequency or getting a Insetion X * or a deletion. X * X * SeqConstantLengthMutation(a, b, len, Mutations, PChanges, PInsDel, Table) X * symbol a[], b[] ; X * same as StrConstantLengthMutation() except at the sequence level. X * Note : the space for b is NOT allocated by the routine. X * X * ReverseSequence(s, length) X * symbol s[] ; X * int length ; X * reverse a sequence in situ X * X * ReverseAlignment(a, length) X * symbol a[] ; X * int length ; X * reverse an alignment in situ X */ X#include "macro.h" X#include X#include X X#include "Strings.h" X#include "error.h" X XAlphabetTable DNASymbol = { 4, {NULLSymbol, 'A', 'C', 'G', 'T'} } ; XComplementTable DNAComplement = { 4, {0, 4, 3, 2, 1} } ; /* {T,G,C,A} */ X XAlphabetTable AlphaSymbol = { 53, X { NULLSymbol, ' ', X 'A','B','C','D','E','F','G','H','I','J','K','L','M', X 'N','O','P','Q','R','S','T','U','V','W','X','Y','Z', X 'a','b','c','d','e','f','g','h','i','j','k','l','m', X 'n','o','p','q','r','s','t','u','v','w','x','y','z' X } X } ; X XAlphabetTable *SymbTable = &DNASymbol ; X Xchar *malloc() ; Xlong time() ; X X/* X * This cost function minimises the number of changes X */ Xfloat Cost11(a, b) X symbol a, b ; X{ X if (a == b) return 0.0 ; X else return 1.0 ; X} X X/* X * maximise the number of matches --- LCS X */ Xfloat Cost21(a, b) X symbol a, b ; X{ X if (a == b) return 0.0 ; X else if ((a == NULLSymbol) || (b == NULLSymbol)) return 1.0 ; X else return 2.0 ; X} X Xprintline(line,length) X char line[] ; X int length ; X{ X int i ; X X for (i = 0; i < length ; i++) putchar(line[i]) ; X putchar('\n') ; X} X Xprint3lines(line1, line2, line3, length) X char line1[], line2[], line3[] ; X int length ; X{ X printline(line1, length) ; X printline(line2, length) ; X printline(line3, length) ; X putchar('\n') ; X} X Xreadln(f) X FILE *f ; X{ X while (getc(f) != '\n') ; X} X X X/* X * read a line(delimeted by '\n' or EOF) from stream f into array s X * also check that current line is not too long for s X * return FALSE if EOF is reached X */ Xbool getline(f, s, maxlen) X char s[] ; X FILE *f ; X int maxlen ; X{ X int len ; X X len = 0 ; X while (((s[len] = (char) getc(f)) != '\n') && ((int) s[len] != EOF)) { X if (++len >= maxlen) { X (*ErrorHandling[ERRLineTooLong])(ERRLineTooLong) ; X return FALSE ; X } X } X X if ( (int) s[len] == EOF ) return FALSE ; X X s[len] = NULL ; X return TRUE ; X} X Xbool ToSymbol(c, symb, Table) X char c ; X symbol *symb ; X AlphabetTable *Table ; X{ X int i ; X X /* can change to binary search later on */ X for ( i = 1 ; i <= Table->size ; i ++) X if (Table->Symbol[i] == c) {*symb = i ; return TRUE ;} X X return FALSE ; X} X XReadSequence(f, seq, length, Table) X FILE *f ; X symbol *seq[] ; X int *length ; X AlphabetTable *Table ; X{ X int i, len, s[MAXSEQLENGTH] ; X int badchars[256] ; /* frequencies of occurrances of illegal symbols */ X bool foundbadchars ; X char c, c1 ; X X len = 0 ; X foundbadchars = FALSE ; X for (i=0 ; i<=255; i++) badchars[i] = 0 ; X X while ( (c = getc(f)) != '\n') { X if (islower(c)) c1 = toupper(c) ; else c1 = c ; X if (!ToSymbol(c1, &s[++len], Table)) { X /* found a illegal character */ X len-- ; /* skip it */ X if (c != ' ') { X badchars[c] ++ ; X foundbadchars = TRUE ; X } X } X if (len >= MAXSEQLENGTH) { X (*ErrorHandling[ERRLineTooLong])(ERRLineTooLong) ; X return ; X } X } X *seq = (symbol *) malloc((unsigned) (len+1) * sizeof(symbol)) ; X (*seq)[0] = NULLSymbol ; X X for ( i = 1; i <= len ; i++) (*seq)[i] = s[i] ; X X *length = len ; X X if (foundbadchars) { X printf("Warning : illegal characters ignored\n") ; X X for (i = 0; i <= 255 ; i++) X if (badchars[i]) X printf("char : %c frequency: %d\n",i,badchars[i]) ; X } X} /* ReadSequence */ X X XReadString(f,A,Table) X FILE *f ; X string *A ; X AlphabetTable *Table ; X{ X getline(f, A->identifier,IDLength) ; X getline(f, A->description,DESCLength) ; X ReadSequence(f,&A->sequence, &A->length, Table) ; X} X XGenbankReadSequence(f, seq, length, Table) X FILE *f ; X symbol *seq[] ; X int *length ; X AlphabetTable *Table ; X{ X int i, len, s[MAXSEQLENGTH] ; X char c, c1 ; X int badchars[256] ; /* frequencies of occurrances of illegal symbols */ X bool foundbadchars ; X X len = 0 ; X foundbadchars = FALSE ; X for (i=0 ; i<=255; i++) badchars[i] = 0 ; X X while ( ((c = getc(f)) != GENBANKDELIMITER) && (c != EOF)) { X if (c == '\n' || c == ' ') continue ; X if (islower(c)) c1 = toupper(c) ; else c1 = c ; X if (!ToSymbol(c1, &s[++len], Table)) { /* found a illegal character */ X len-- ; /* skip it */ X if (c != ' ') { X badchars[c] ++ ; X foundbadchars = TRUE ; X } X } X X if (len >= MAXSEQLENGTH) { X (*ErrorHandling[ERRLineTooLong])(ERRLineTooLong) ; X return ; X } X } X X *seq = (symbol *) malloc((unsigned) (len+1) * sizeof(symbol)) ; X (*seq)[0] = NULLSymbol ; X X for ( i = 1; i <= len ; i++) (*seq)[i] = s[i] ; X X *length = len ; X X if (foundbadchars) { X printf("Warning : illegal characters ignored\n") ; X X for (i = 0; i <= 255 ; i++) X if (badchars[i]) X printf("char : %c frequency: %d\n",i,badchars[i]) ; X } X} /* GenbankReadSequence */ X X XGenbankReadString(f,A,Table) X FILE *f ; X string *A ; X AlphabetTable *Table ; X{ X int c ; X X if ((c = getc(f)) != GENBANKDELIMITER) ungetc(c,f) ; X getline(f, A->identifier,IDLength) ; X getline(f, A->description,DESCLength) ; X GenbankReadSequence(f,&A->sequence, &A->length, Table) ; X} X X X/* X * translate a character array into a sequence according to X * alphabet table TABLE X */ XTranslateToSymbol(c, seq, Table) X char c[] ; X symbol *seq[] ; X AlphabetTable *Table ; X{ X int i, len ; X symbol s[MAXSEQLENGTH] ; X X len = 0 ; X X while ( c[++len] ) ToSymbol(c[len], &s[len], Table) ; X X *seq = (symbol *) malloc((unsigned) (len) * sizeof(symbol)) ; X X (*seq)[0] = 0 ; X for ( i = 1; i <= len ; i++) X (*seq)[i] = s[i] ; X} X X/***************************************************************************/ X XfPrintSequence(stream, s, length, Table) X FILE *stream ; X symbol s[] ; X int length ; X AlphabetTable *Table ; X{ X int i ; X X for (i = 1 ; i <= length; i++) putc(Table->Symbol[s[i]],stream) ; X putc('\n',stream) ; X} X XPrintSequence(s, length, Table) X symbol s[] ; X int length ; X AlphabetTable *Table ; X{ fPrintSequence(stdout, s, length, Table) ; } X X X XDisplaySequence(s, length, Table) X symbol s[] ; X int length ; X AlphabetTable *Table ; X{ fDisplaySequence(stdout, s, length, Table) ; } X XfDisplaySequence(stream, s, length, Table) X FILE *stream ; X symbol s[] ; X int length ; X AlphabetTable *Table ; X{ X int i, count; X X count = 0 ; X for (i = 1 ; i <= length; i++) { X if (++count > LINELENGTH) { X count = 0 ; X putc('\n',stream) ; X } X putc(Table->Symbol[s[i]],stream) ; X } X putc('\n',stream) ; X} X XPrintString(s, Table) X string *s ; X AlphabetTable *Table ; X{ X printf("%s %d bases\n", s->identifier,s->length) ; X printf("%s\n", s->description) ; X DisplaySequence(s->sequence, s->length, Table) ; X} X XPrintAlignment(Alignment, AlignLength, A, B, Table) X align Alignment[] ; X symbol A[], B[] ; X int AlignLength ; X AlphabetTable *Table ; X{ X int i, lo, hi ; X X hi = 0 ; X while (hi < AlignLength) { X lo = hi+1 ; X hi = min(AlignLength, hi + LINELENGTH) ; X for ( i = lo ; i <= hi; i++) X if (Alignment[i].a == 0) putchar('-'); X else putchar(Table->Symbol[A[Alignment[i].a]]) ; X putchar('\n') ; X for ( i = lo ; i <= hi; i++) X if ((Alignment[i].a != NULLSymbol) && X (Alignment[i].b != NULLSymbol) && X (A[Alignment[i].a] == B[Alignment[i].b])) putchar('|'); X else putchar(' ') ; X X putchar('\n') ; X for ( i = lo ; i <= hi; i++) X if (Alignment[i].b == 0) putchar('-'); X else putchar(Table->Symbol[B[Alignment[i].b]]) ; X putchar('\n') ; X putchar('\n') ; X } X} X XPrint3DAlignment(Alignment, AlignLength, A, B, C, Table) X align3D Alignment[] ; X symbol A[], B[], C[] ; X int AlignLength ; X AlphabetTable *Table ; X{ X char line1[LINELENGTH], line2[LINELENGTH], line3[LINELENGTH]; X int i, count ; X X count = 0 ; X for (i = 1 ; i <= AlignLength ; i++) { X if (Alignment[i].a == 0) line1[count] = '-' ; X else line1[count] = Table->Symbol[A[Alignment[i].a]] ; X X if (Alignment[i].b == 0) line2[count] = '-' ; X else line2[count] = Table->Symbol[B[Alignment[i].b]] ; X X if (Alignment[i].c == 0) line3[count] = '-' ; X else line3[count] = Table->Symbol[C[Alignment[i].c]] ; X X if (++count > LINELENGTH) { X count = 0 ; X print3lines(line1,line2,line3,LINELENGTH) ; X } X } X if (count != 0) X print3lines(line1,line2,line3,count) ; X} X XDuplicateSequence(a, alen, b, blen) X symbol a[], *b[] ; X int alen, *blen ; X{ X int i ; X X *b = (symbol *) malloc((unsigned) (alen+1) * sizeof(symbol)) ; X X for ( i = 0 ; i <= alen; i++) (*b)[i] = a[i] ; X X *blen = alen ; X} X Xsymbol randomsymbol(Table) X AlphabetTable *Table ; X{ X return 1 + randomnum(Table->size) ; X} X X/* X * generate a pair of random symbol that are different X */ Xrandomsymbolpair(Table,a,b) X AlphabetTable *Table ; X symbol *a, *b ; X{ X *a = 1+randomnum(Table->size) ; X *b = 1+randomnum(Table->size-1) ; X if (*b >= *a) X (*b) ++ ; X} X X/* X * generates a random sequence X */ XRandomSequence(s, length, Table) X symbol **s ; X int length ; X AlphabetTable *Table ; X{ X int i ; X X *s = (symbol *) malloc((unsigned) (length+1) * sizeof(symbol)) ; X (*s)[0] = NULLSymbol ; X for (i = 1; i <= length; i++) X (*s)[i] = randomsymbol(Table) ; X X} X XRandomString(s, Table) X string *s ; X AlphabetTable *Table ; X{ X RandomSequence(&s->sequence, s->length, Table) ; X} X X/* X * do a random mutation X */ XRandomMutation(a, b, Mutations, PChanges, PInsertion, PDeletion, Table) X string *a, *b ; X int Mutations, PChanges, PInsertion, PDeletion ; X AlphabetTable *Table ; X{ X symbol t[MAXSEQLENGTH] ; X int i, j, choice, pos ; X int P, PI, len ; X X P = PChanges + PInsertion + PDeletion ; X PI = PChanges + PInsertion ; X len = a->length ; X X for ( i = 1 ; i <= len; i++) X t[i] = a->sequence[i] ; X X for ( i = 1 ; i <= Mutations ; i++) { X choice = randomnum(P) ; X if (choice < PChanges) { /* changes */ X if (len > 0) { X pos = randomnum(len) + 1 ; /* 1..length */ X t[pos] = randomsymbol(Table) ; X debug(printf("change at %d %d\n",pos, t[pos]) ;) X } X } else if ( choice < PI) {/* insertion */ X pos = 1 + randomnum(len+1) ; /* 1..length+1 */ X for ( j = ++len; j > pos; j-- ) X t[j] = t[j-1] ; X t[pos] = randomsymbol(Table) ; X debug(printf("insertion at %d %d\n",pos, t[pos]) ;) X } else { /* deletion */ X if (len > 0) { X pos = randomnum(len) + 1 ; /* 1..length */ X debug(printf("deletion at %d %d\n",pos, t[pos]) ;) X for ( j = pos; j < len ; j++) X t[j] = t[j+1] ; X len-- ; X } X } X } X X DuplicateSequence(t, len, &b->sequence, &b->length) ; X} X XStrConstantLengthMutation(a, b, Mutations, PChanges, PInsDel, Table) X string *a, *b ; X int Mutations, PChanges, PInsDel ; X AlphabetTable *Table ; X{ X X b->sequence = (symbol *)malloc((unsigned) (a->length+1) * sizeof(symbol) ) ; X SeqConstantLengthMutation(&a->sequence[1], &b->sequence[1], a->length, Mutations, PChanges, PInsDel, Table) ; X} X XSeqConstantLengthMutation(a, b, len, Mutations, PChanges, PInsDel, Table) X symbol *a, *b ; X int Mutations, PChanges, PInsDel ; X AlphabetTable *Table ; X{ X int i, j, choice, pos ; X int P, PI ; X int nchanges, ninsdel ; X symbol t[MAXSEQLENGTH] ; X X P = PChanges + PInsDel ; X X for ( i = 0 ; i < len; i++) X t[i] = a[i] ; X X nchanges = ninsdel = 0 ; X X for ( i = 1 ; i <= Mutations ; i++) { X choice = randomnum(P) ; X if (choice < PChanges) /* changes */ X nchanges++ ; X else X ninsdel++ ; X } X X if (odd(ninsdel)) /* make it even */ X if (randomnum(2) && nchanges > 0) { /* toss a coin */ X /* inscrement it */ X ninsdel++ ; nchanges-- ; X } else { /* decrease it */ X ninsdel-- ; nchanges++ ; X } X debug(printf("changes: %d ins/del %d\n",nchanges, ninsdel) ;) X P = nchanges + ninsdel ; X PI = nchanges + (int)(ninsdel/2) ; X X for ( i = 1 ; i <= Mutations ; i++) { X choice = randomnum(P) ; X if (choice < nchanges) { /* changes */ X if (len > 0) { X pos = randomnum(len) ; X t[pos] = randomsymbol(Table) ; X debug(printf("change at %d %d\n",pos, t[pos]) ;) X nchanges -- ; P-- ; PI-- ; X } X } else if ( choice < PI) {/* insertion */ X pos = randomnum(len) ; X for ( j = ++len; j > pos; j-- ) X t[j] = t[j-1] ; X t[pos] = randomsymbol(Table) ; X debug(printf("insertion at %d %d\n",pos, t[pos]) ;) X PI-- ; P-- ; X } else { /* deletion */ X if (len > 0) { X pos = randomnum(len) + 1 ; X debug(printf("deletion at %d %d\n",pos, t[pos]) ;) X for ( j = pos; j < len ; j++) X t[j] = t[j+1] ; X len-- ; P-- ; X } X } X } X for ( i = 0 ; i < len; i++) b[i] = t[i] ; X} X XReverseSequence(s, length) X symbol s[] ; X int length ; X{ X int i, mid ; X symbol temp ; X X mid = length >> 1 ; X for ( i = 1 ; i <= mid ; i++) X swap(s[i], s[length-i+1]) ; X} X XReverseAlignment(alignment, length) X align alignment[] ; X int length ; X{ X int i, mid ; X align temp ; X X mid = length >> 1 ; X for ( i = 1 ; i <= mid ; i++) X swap(alignment[i], alignment[length-i+1]) ; X} X XCheckAlignment(alignment1,length1,alignment2,length2) X align alignment1[], alignment2[] ; X int length1, length2 ; X{ X int i ; X X if (length1 != length2) return FALSE ; X for (i = 1 ; i<=length1; i++) X if ((alignment1[i].a != alignment2[i].a) X || (alignment1[i].b != alignment2[i].b )) X return FALSE ; X return TRUE ; X} X Xstatic float findmin(D,m,n) X float D[][MATRIXSIZE]; X int m,n ; X{ X int i, j ; X float minval ; X X minval = MAXINT ; X for (i=0; i <= m ; i++) X for (j=0; j <= n ; j++) X if (D[i][j] < minval) minval = D[i][j] ; X return minval ; X} X X/* X * scale the matrix down by the minimum entry X */ Xscale(D,m,n) X float D[][MATRIXSIZE]; X int m, n ; X{ X float minval ; X int i, j ; X X minval = findmin(D,m,n) ; X for (i=0; i <= m ; i++) X for (j=0; j <= n ; j++) X D[i][j] -= minval ; X} X X/* X * Plot density matrix on screen X */ XPlotMatrix(a,b,D,m,n,Table) X float D[][MATRIXSIZE] ; X symbol a[], b[] ; X int m, n ; X AlphabetTable *Table; X{ X int i, j ; X float ml ; X char ch ; X X printf("\n ") ; X for (j=1 ; j<= n ; j++) { X /*putchar(' ');*/ X putchar(Table->Symbol[b[j]]); X } X putchar('\n') ;; X for (i=0 ; i<= m ; i++) { X if (i > 0 ) printf("%c:",Table->Symbol[a[i]]) ; X else printf(" "); X for (j=0 ; j <= n; j++) { X ml = D[i][j] ; X if (ml > 16) ch=' ' ; X else if (ml>8) ch='.' ; X else if (ml>4) ch='-' ; X else if (ml>2) ch='+' ; X else if (ml>1) ch='*' ; X else ch='#' ; X putchar(ch) ; X /*putchar(' ') ;*/ X } X putchar('\n') ; X } X putchar('\n') ; X printf("KEY: # * + - . \n") ; X printf(" : 1 2 4 8 16 bits over mml\n") ; X putchar('\n') ; X} X XOutputMatrix(A,B,D,ofile,Caption,Table) X string *A, *B ; X float D[][MATRIXSIZE] ; X char *Caption ; X FILE *ofile ; X AlphabetTable *Table ; X{ X int i, j ; X X fprintf(ofile,"%d %d\n",A->length, B->length) ; X fDisplaySequence(ofile, A->sequence, A->length, Table) ; X fDisplaySequence(ofile, B->sequence, B->length, Table) ; X for ( i = 0 ; i <= A->length ; i++) { X for ( j = 0 ; j <= B->length ; j++) X fprintf(ofile,"%5.1f ", D[i][j]) ; X putc('\n',ofile) ; X } X fprintf(ofile,"%s\n",Caption) ; X fprintf(ofile,"%s vs %s\n",A->identifier,B->identifier) ; X} /*plot*/ X END_OF_IO.c if test 19044 -ne `wc -c README <<'END_OF_README' XMinimum-Message Length Alignment Based on 1, 3 and 5-State Machines. X-------------------------------------------------------------------- X XInstallataion : execute the file "install" X Xdocumentation : file "doc", in troff format X Xexecutables : rtheory X Xutilities : scale, pimg (see documentation) X END_OF_README if test 297 -ne `wc -c Strings.h <<'END_OF_Strings.h' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X#define IDLength 40 X#define DESCLength 180 X X#define MAXSEQLENGTH 4000 X#define MAXALIGNLENGTH 8000 /* 2 * MAXSEQLENGTH */ X#define MATRIXSIZE 650 X X#define GENBANKDELIMITER '>' /* Genbank record delimiter */ X X#define MaxSymbol 257 X X#define NULLSymbol 0 X Xtypedef int symbol ; X Xtypedef struct { X int size ; /* need this for binary search */ X char Symbol[MaxSymbol] ;/* must be initialized in ascending order*/ X } AlphabetTable ; X Xtypedef struct { X int size ; X int complement[MaxSymbol] ; X } ComplementTable ; X X Xtypedef struct { X char identifier[IDLength] ; X char description[DESCLength] ; X int length ; X symbol *sequence ; /* malloc the spaces required */ X } string ; X Xtypedef struct { /* alignment of 2 strings */ X int a, b ; X } align ; X Xtypedef struct { /* alignment of 3 strings */ X int a, b, c ; X } align3D ; X Xtypedef struct { /* for block alignment */ X int d, v, h ; X } Balign ; X Xextern AlphabetTable DNASymbol, AlphaSymbol ; Xextern ComplementTable DNAComplement ; X Xextern AlphabetTable *SymbTable ; X Xfloat Cost11(), Cost21() ; /* Cost functions */ X Xextern double logfactorial[MAXSEQLENGTH] ; END_OF_Strings.h if test 2308 -ne `wc -c doc <<'END_OF_doc' X.nh X.ce 8 X\fBMinimum-Message Length Alignment XBased on 1, 3 and 5-State Machines.\fP X XC. N. Yee, L. Allison, XDepartment of Computer Science, XMonash University, XAustralia 3168. X Xxxx@bruce.cs.monash.edu.au xxx=[cyee, lloyd] XApril 1991 X X.he ''MML Alignment'-%-' X.fo ''Computer Science, Monash University'' X X.sh 1 "Introduction." X.pp XThis document consists of program documentation (only) Xfor Minimum-Message Length (MML) alignment programs Xfor DNA. XThe algorithms are based on one, three and five-state models of mutation Xor relation of DNA. XThe document describes `how' not `why'. XFor a gentle (?) introduction to `why' see TR90/148. X.pp XA simple driver routine is provided to allow the algorithms to be used. XThis is \fInot\fP intended to provide a practical sequence-analysis environment. XThe intention is that the alignment algorithms Xwill be used for research into the alignment problem and Xmay be extracted and built into sequence-analysis packages by others. X.pp XIf you do make use of these algorithms, please include these references Xin any subsequent publication: X X.ne 2 XL.Allison, C. S. Wallace & C. N. Yee. XInductive inference over macro-molecules. X.br XTR 90/148 Dept. Computer Science, Monash University, Australia 3168. X X.ne 2 XL. Allison & C. N. Yee. XMinimum message length encoding and the comparison of macro-molecules. X.br XBull. Math. Biol. 52(3) 431-453 1990. X X.ne 2 XL.Allison, C. S. Wallace & C. N. Yee. XFinite-State Models in the Alignment of Macromolecules XJ. Mol. Evol. (1992) 35:77-89 X X X.sh 1 "The One-State Machine." X.pp XThe one-state machine has a single state and the transitions or Xarcs are named as follows: X M(atch) same character in both strings, X C(hange) differing characters in the 2 strings, X V(insert in A) and X H(insert in B) . X.br X(V from Vertical, H from Horizontal.) XThe program forces P(I) = P(D) as this is a reasonable prior assumption. X(It would be easy to let P(I) and P(D) differ; Xthere would be a cost in stating an extra parameter Xbut the data strings might be fitted better.) X X.sh 1 "The Three State Machine." X.pp XThe three-state machine corresponds to linear indel costs. XThere is a low probability of starting to insert (delete) Xand a higher probability of continuing a run of inserts (deletes). X.(b X.ft CW X ---- ---- X | | | | X DM | V V |DC X | ----------- | X ---| |-- X /---->| D |<----. X / | | . X / /->-----------<-. . X / / / . . . X HM/ HC/ / . .VC .VM X / / / . . . X | / /DH DV. . . X | | V V | | X | -------- HV -------- | X ----| |--------->| |----- X | H | | V | X --->| |<---------| |<--- X HH| -------- VH -------- |VV X |----- ------| X.ft X X \fBFigure 1 : The Three State Machine.\fP X.)b X.pp XThe three states are called D (diagonal move, for matches and changes), XH (horizontal move, or insert in B) and V (vertical move, or insert in A). XThere are four outgoing arcs in state D: X.(l XDM - a match, which leaves the machine in state D XDC - a change, which leaves the machine in state D XDH - an insert, which leaves the machine in state H, and XDV - a delete, which leaves the machine in state V. X.)l X.pp XSimilarly the four outgoing arcs in state H are named: HM, HC, HH, and HV; Xand four for state V: VM, VC, VH, VV. X.pp XWe make states H and V equivalent X(i.e. DH = DV, VM = HM, VC = HC, VH = HV, VV = HH) Xas there is no evidence as yet to indicate that they should differ. XThis reduces the degrees of freedom and hence the number of parameters Xthat need to be stated. X X.sh 1 "The Five State Machine." X.pp XThe five-state machine corresponds to piece-wise linear indel costs. X.(b X.ft CW X ------- ------- X ----| |-----. /-----| |---- XAA| | A | .AD BD/ | B | |BB X --->| |<-. . / /->| |<--- X ------- . . DM DC / / ------- X . . --- --- / / X DA. | | | | | | /DB X . V | V V | V / X .-----------------/ X | | X /---->| D |<----. X / | | . X / /->-----------------<-. . X / / / . . . X HM/ HC/ / . .VC .VM X / / / . . . X | / /DH DV. . . X | | V V | | X | -------- HV -------- | X ----| |--------------->| |----- X | H | | V | X --->| |<---------------| |<--- X HH| -------- VH -------- |VV X |----- ------| X.ft X \fBFigure 2 : The Five State Machine (Bugs Bunny).\fP X.)b X.pp XThe five-state machine is an extension of the three-state machine Xwith two extra states \fB\A\fP and \fBB\fP Xfor long insert in string A and string B respectively. XTo reduce the number of arcs, and thus parameters, to estimate and state, Xwe restrict the transition to and Xfrom \fBA\fP and \fBB\fP through state \fBD\fP. XNote that the arcs \fBAD\fP and \fBBD\fP bring the machine back to state \fBD\fP Xbut they do not generate any character. X.pp XFor the reasons stated for the Three-State machine Xthe \fBH\fP and \fBV\fP; \fBA\fP and \fBB\fP states are made equivalent. XWe also fix the probabilities of long indels as they do not occur Xfrequently enough for them to be meaningfully infered from single pairs of Xstrings of typical lengths. XAt present these parameters are fixed at: X.br XPDA\ =\ PDB\ =\ 0.001; PAD\ =\ ABD\ =\ 0.01; PAA\ =\ PBB\ =\ 0.99. X.br XSee file "5State.c" to change them. X X.sh 1 "The Iteration Procedure to Find Optimal Parameters." X.pp XWe describe here an iterative procedure Xthat adjusts the parameter values to minimise the message length. XWe believe that from a `reasonable' set of starting parameters Xthe procedure finds the global minimum Xexcept in exceptional, artificial cases. XA similar technique is used for the three and five-state machines. X.pp XEach cell D[i,j] is associated with the \fBML\fP field Xplus four frequency counters \fBM,C,H,V\fP. XThe \fBML\fP stores the message length to transmit A[1..i] and B[1..j]. XThe four frequency counters store the number of times Xthe machine makes transitions through the various arcs to get to D[i,j]. XThe general dynamic programming step is as follows: X.pp XWe first compute the odds-ratios of entering D[i,j] from Xthe three incoming neighbours; XD[i-1,j](Phoriz), D[i-1,j-1](Pdiag) and D[i,j-1](Pvert). X.(l X.ft CW XPvert = exp2(-D[i,j-1].ML - CostIns) ; XPhoriz = exp2(-D[i-1,j].ML - CostDel) ; Xif (A[i] = B[j]) then Pdiag = exp2(-D[i-1,j-1].ML - CostMatch) ; X else Pdiag = exp2(-D[i-1,j-1].ML - CostChange) ; X.ft X.)l X.pp XArriving from the vertical direction involves a transition on the V arc Xso we increment D[i,j-1].V by 1. XSimilarly for the horizontal direction we increment the H field of D[i-1,j]. XFor D[i-1,j-1] on the diagonal direction Xwe either increment the M or the C field Xdependent on whether the A[i] and B[j] is a match or a change. XThe frequency counters of D[i,j] are the Xsums of their counterparts from D[i,j-1], D[i-1,j] and D[i-1.j-1], Xweighted by the odds-ratios of entrance from each of the directions. XFinally the message length D[i,j].ML = log2(Pvert + Phoriz + Pdiag). X(NB.\ A fast routine is used to avoid calling exp and log.) X.pp XThe boundary conditions are given by: X.(l X.ft CW XD[0,0].ML = 0 ; D[0,0].M,C,H,V = 0 ; XD[i,0].ML = CostDel*i; D[i,0].V = i; D[i,0].M,C,H = 0 ; XD[0,j].ML = CostIns*j; D[0,j].H = j; D[0,j].M,C,V = 0 ; X.ft X.)l X.pp XWe start the procedure with estimated costs (probabilities) for the arcs. XHowever at the end of a run Xwe can use the frequency counters of cell D[m,n] Xto compute what the parameter values should have been Xfor the alignment that we have computed. XChanging the parameters changes the optimality of alignments so we Xiterate with these new parameter values. XThe iteration converges quickly. X.pp XWe can also extend the idea Xby recomputing the arc costs at each step Xfrom the frequency counters. XThis amounts to adjusting the cost of the arcs Xby what have encountered so far in the input strings. XIn this case the starting parameters for cell D[0,0] Xis not crucial as the adaptive process stabilises very quickly. XA typical value is P(M) = P(C) = P(H) =P(V) = 0.25. XNote that this method is similar to computing Xthe message length of ONE alignment using \fIadaptive coding\fP. XWe only use this process as a heuristic to generate good starting parameters Xfor subsequent iterations because it is not "sound". X X.sh 1 "The R-theory Driver Program." X.pp XA simple driver program has been written to demonstrate the algorithms. XIt is \fInot\fP intended to provide a practical sequence-analysis environment. X X.sh 2 "An Example Output." X.pp XThe sample run below require a file called "examplestrings", Xtexts beginning with "#" are explanatory notes: X.(l X.ft CW X>example 1 # a short name must begin with '>' Xexample string 1 # a 1 line description XCAACCAAC TGCA # the strings itself over XTTAGCT # 1 or more lines X>example 2 # the second string, begin with '>' Xexample string 2 XACCAACCA TGCA XGGGG XTTAGCT X.ft X.)l X.pp XThe output from a complete run is given below. XTexts in \fIItalics\fP are user responses to Xprompts from the driver program. X X.nf X.ft CW X# X# The driver program X# XInput the file name where the strings are in Xor type to input strings from terminal : \fIexamplestrings\fP X X.ne 3 Xexample 1 18 bases Xexample string 1 XCAACCAACTGCATTAGCT X X.ne 3 Xexample 2 22 bases Xexample string 2 XACCAACCATGCAGGGGTTAGCT X X.ne 6 XDo you want to run: X 1. One-State machine X 2. Three-State machine X 3. Five-State machine X 4. One-State machine one (best) alignment X 5. Three-State machine one (best) alignment X 6. All machines X X Input option : \fI4\fP X XFile name to output density plot matrix for One-State machine? Xor type if density plot is not required : \fI1State.plot\fP XFile name to output density plot matrix for Three-State machine? Xor type if density plot is not required : \fI3State.plot\fP XFile name to output density plot matrix for Five-State machine? Xor type if density plot is not required : \fI5State.plot\fP X XInitial parameters -- do you want to : X 1. Do adaptive coding on 1st run, or X 2. Use default parameters, or X 3. Input your own X Input option : \fI1\fP X X# X# Output from the r-theory alignment routines X# XONE-STATE MACHINE : Xiteration 1: PM=0.426 PC=0.173 PID=0.401 ML= 92.864 ( 79.064) SumFq: 25.1 Xiteration 2: PM=0.496 PC=0.129 PID=0.374 ML= 91.160 ( 77.285) SumFq: 24.6 Xiteration 3: PM=0.531 PC=0.101 PID=0.368 ML= 90.396 ( 76.394) SumFq: 24.5 Xiteration 4: PM=0.548 PC=0.078 PID=0.373 ML= 90.102 ( 75.942) SumFq: 24.6 Xiteration 5: PM=0.558 PC=0.061 PID=0.381 ML= 89.919 ( 75.590) SumFq: 24.7 Xiteration 6: PM=0.565 PC=0.049 PID=0.386 ML= 89.771 ( 75.292) SumFq: 24.8 X X.(b Xiteration 7: X Fwd[i,j]+Rev[i+1,j+1]-MML X 1 avg algn= 102.674 ( - chars - len = 32.514 ) X null-theory= 94.15 = 10.87(sumlen) + 3.28(difflen) + 80.00(chars) bits X OneState= 89.66 = 4.88(pars) + 75.07(algn) + 9.71(len) Xalgn - chars= 14.62 Xprobability related= 0.9572 X.)b X X.(b X ACCAACCATGCAGGGGTTAGCT X ##*-. XC:*-*#-.. XA:++--#-.. XA:.*-.-#-.... XC:.-*---#-.... XC: .-*-.-#-.... XA: .-*--**+--... XA: .-*+-*++---.. XC: ..+*#--+---.. XT: ..--#--++--.. XG: ...-#---+--. XC: ...-#++++-.. XA: ...-#####-.. XT: ...-++++#-. XT: .....--+#-. XA: ......-#-. XG: ......-#-. XC: ....-#- XT: ...-# X XKEY: # * + - . X : 1 2 4 8 16 bits over mml X X1-State Machine: 2.2443(bits/symb) 89.77(ML) 75.29(Matrix) X.)b X X X.ne 10 XTHREE-STATE MACHINE : X Iteration 1 : X M C H V sumfq X D 0.5640 0.461 0.159 0.190 0.190 13.8 X H 0.4360 0.328 0.180 0.254 0.238 7.9 X V 0.4360 0.328 0.180 0.238 0.254 3.8 X ML= 95.8524 (matrix part= 79.0891) X Iteration 2 : X M C H V sumfq X D 0.5967 0.589 0.110 0.151 0.151 14.5 X H 0.4033 0.313 0.154 0.312 0.221 7.3 X V 0.4033 0.313 0.154 0.221 0.312 3.3 X ML= 92.9009 (matrix part= 76.0451) X Iteration 3 : X ... X ... X ... X Iteration 8 : X M C H V sumfq X D 0.6190 0.774 0.036 0.095 0.095 14.6 X H 0.3810 0.295 0.067 0.550 0.089 7.2 X V 0.3810 0.295 0.067 0.089 0.550 2.9 X ML= 87.5069 (matrix part= 69.0361) X X.(b X ACCAACCATGCAGGGGTTAGCT X #**-... XC:*--*.... XA:*---*.... XA:-*-..*..... XC:..*...*...... XC:...*..-*-....... XA:....*--+*--...... XA: ....*++*--....... XC: ...-**#-----.... XT: ...---#------... XG: ..-..-#------.. XC: ..-..-#++++-... XA: ...----#####-... XT: ..-------+#-.. XT: ......---#... XA: ..........#... XG: ........#.. XC: .......#. XT: .....# X XKEY: # * + - . X : 1 2 4 8 16 bits over mml X X3-State Machine: 2.1877(bits/symb) 87.51(ML) 69.04(Matrix) X.)b X X X.ne 10 XFIVE-STATE MACHINE : XIteration 1 : X D 0.6779 0.532(M) 0.182(C) 0.185(H) 0.100(V) 0.001(A) 0.001(B) 21.3(sumfq) X H 0.1837 0.379(M) 0.278(C) 0.281(H) 0.061(V) 6.2(sumfq) X V 0.0805 0.359(M) 0.333(C) 0.115(H) 0.193(V) 2.4(sumfq) X A 0.0204 0.010(D) 0.990(A) 0.2(sumfq) X B 0.0374 0.010(D) 0.990(B) 0.7(sumfq) X ML= 98.7571 (matrix part= 77.9097) XIteration 2 : X ... X ... X ... XIteration 11 : X D 0.6522 0.725(M) 0.064(C) 0.126(H) 0.082(V) 0.001(A) 0.001(B) 19.0(sumfq) X H 0.2147 0.274(M) 0.140(C) 0.527(H) 0.059(V) 6.5(sumfq) X V 0.0830 0.421(M) 0.160(C) 0.115(H) 0.305(V) 2.4(sumfq) X A 0.0263 0.010(D) 0.990(A) 0.3(sumfq) X B 0.0239 0.010(D) 0.990(B) 0.2(sumfq) X ML= 92.7831 (matrix part= 71.1250) Xminimum MesgLength is 92.78 X X.(b X ACCAACCATGCAGGGGTTAGCT X ###-... XC:*-+#-... XA:*+--#.... XA:.*-.-#..... XC:..*-.-#-.... XC:...*-.-#-...... XA: ...*--+*---.... XA: ...*++*----.... XC: ..-**#------... XT: ..---#------.. XG: .-..-#------.. XC: .-..-#++++-.. XA: ....-#####-.. XT: ....---++#-.. XT: ......---#.. XA: .......#.. XG: .......#.. XC: ......#. XT: .....# X XKEY: # * + - . X : 1 2 4 8 16 bits over mml X X5-State Machine: 2.3196(bits/symb) 92.78(ML) 71.13(Matrix) X.)b X X X.(b XSUMMARY X------- Xmessage length (bits/symbol) : XNull: 2.354 1-S: 2.244(1.882) 3-S: 2.188(1.726) 5-S: 2.320(1.778) X Xmessage length (bits) : XNull: 94 1-S: 90( 75) 3-S: 88( 69) 5-S: 93( 71) X.)b X.ft X.fi X X X.sh 1 "Displaying a Density Plot on a Graphics Device." X.pp XTwo utilities are provided to display the density plot Xgenerated by the r-theory machines Xfor display on a POSTSCRIPT graphic device. X.pp XThe program \fBscale\fP transforms the density matrix Xinto matrix of grey-scale of the range 0\ -\ 255. XIt also provides a few transformation options Xthat highlight the different features of the r-theory alignment. X.pp XThe \fBl\fP option displays the matrix in a logarithmic scale of message length. XThe optional `contrast' factor controls the sharpness of the Xhigh probability region. The default value is 1. A high value X(e.g. 4) gives a sharper picture while a low value (e.g. 0.2) Xincludes more ML's in the high intensity region. X.pp XThe \fBp\fP option displays the image in a linear scale in \fIprobability\fP. XThe \fBm\fP option displays the image in a linear scale in \fIML\fP. XIf the optional \fBthreshold\fP value Xis supplied then all cells in excess of this value will be ignored, Xand the remaining values scaled accordingly. X.pp XThe program \fBpimg\fP accepts the output from \fIscale\fP Xand generate a POSTSCRIPT program to display the matrix. X.pp XFor example, to display \fI1State.plot\fP generated Xby the sample run in last section in logarithmic scale: X.(l Xscale l < 1State.img | pimg | lpr -Plaser X.)l X X.sh 1 "Resource Utilizations" X.pp XThe generation of density plots requires o(n^2) sapce. XAt present the program can generate plots for strings Xof length 300, and this requires 10000 kbytes of stacksapce. XMake sure that you have allocated enough stackspace Xon your machine by the command: X.(l Xlimit stacksize 10000 X.)l X.pp XYou can adjust the constant \fBMATRIXSIZE\fP in "Strings.h" Xaccordingly depending on your requirements Xand the memory capacity of your machine. X.pp XWhen density plot is not required the machine can handle Xstrings of much greater length. It is controlled by the Xconstant \fBMAXSEQLENGTH\fP in "Strings.h", which is currently Xset to 4000. END_OF_doc if test 17836 -ne `wc -c driver.c <<'END_OF_driver.c' X X/* X * Copyright (c) L.Allison, C.S.Wallace, C.N.Yee. X * Dept. Computer Science, Monash University, Australia 3168, May 1991. X * xxx@bruce.cs.monash.edu.au where xxx = {lloyd, csw, cyee} X * 1. These routines may only be used for non-commercial, non-classified, X * research purposes. X * 2. No liability of any kind is accepted for their use. X * 3. As a courtesy, please inform us of any interesting uses of the routines, X * of any bugs that you find in them and of how to correct X * such bugs, if known. X * 4. Do not delete this copyright notice. X * 5. Please include the following references in any publication based wholly X * or in part on the use of these routines or on the ideas contained in them: X * . Allison L. & C.N.Yee. X * Minimum Message Length Encoding and the Comparison of Macro-Molecules. X * Bull. Math. Bio. 52(3) 431-453 1990 X * . Allison L., C.S.Wallace & C.N.Yee. Finite-State Models in the Alignment X * of Macromolecules. J. Mol. Evol. (1992) 35:77-89 X * 6. You may distribute the routines to others if and only if X * they agree to accept and maintain these conditions and no fee is charged. X * 7. There is no rule 7. X */ X/* X * "Driver.c" X * X * driver for public release of R-theory alignment program X */ X#include X#include "macro.h" X#include "Strings.h" X#include "rtheory.h" X X#define ONESTATE 1 X#define THREESTATE 2 X#define FIVESTATE 3 X#define ONESTATER0 4 X#define THREESTATER0 5 X#define ALL 6 X X#define Accurarcy 0.000005 X XFILE *fopen() ; XFILE *OSoutfile, *TSoutfile, *FSoutfile, *infile ; X Xchar filename[IDLength] ; X Xstring A, B ; XOneStatePBlock OSParameters ; XOneStatePBlock OSR0Parameters ; XThreeStatePBlock TSParameters ; XThreeStatePBlock TSR0Parameters ; XFiveStatePBlock FSParameters ; X Xchar * XMachineName(Machine) X int Machine ; X{ X switch (Machine) { X case ONESTATE : return "One-State" ; X case THREESTATE : return "Three-State" ; X case FIVESTATE : return "Five-State" ; X case ONESTATER0 : return "One-State best alignment" ; X case THREESTATER0: return "Three-State best alignment" ; X default : printf("ERROR : unknown machine") ; exit() ; X } X} X XPromptForStrings() X{ X int c ; X X printf("Input the file name where the strings are in\n") ; X printf("or type to input strings from terminal : ") ; X X if ((c = getchar()) == '\n') { X printf("input string 1:\n") ; X ReadSequence(stdin, &A.sequence, &A.length, &DNASymbol) ; X printf("input string 2:\n") ; X ReadSequence(stdin, &B.sequence, &B.length, &DNASymbol) ; X } else { X ungetc(c,stdin) ; X getline(stdin,filename,IDLength) ; X if (!(infile = fopen(filename,"r"))) { X printf("Error opening %s\n",filename) ; X exit(99) ; X } X printf("\n") ; X GenbankReadString(infile, &A, &DNASymbol) ; X PrintString(&A, &DNASymbol) ; X printf("\n") ; X GenbankReadString(infile, &B, &DNASymbol) ; X PrintString(&B, &DNASymbol) ; X } X} X X XPromptOneFile(Machine,outfile) X int Machine ; X FILE **outfile ; X{ X int c ; X X while (TRUE) { X printf("File name to output density plot matrix for %s machine?\n",MachineName(Machine)) ; X printf("or type if density plot is not required : ") ; X if ((c = getchar()) != '\n') { X ungetc(c,stdin) ; X getline(stdin,filename,IDLength) ; X if (*outfile=fopen(filename,"w")) return ; X printf("Error opening file: %s\n",filename) ; X } else { X *outfile = (FILE *) NULL ; X return ; X } X } X} X XPromptOutputFile(Machine) X int Machine ; X{ X if (A.length > MATRIXSIZE || B.length > MATRIXSIZE) X printf("Sequences are too long ... no density plot generated\n") ; X else { X putchar('\n') ; X switch(Machine) { X case ONESTATE : X PromptOneFile(ONESTATE,&OSoutfile) ; X break ; X case THREESTATE : X PromptOneFile(THREESTATE,&TSoutfile) ; X break ; X case FIVESTATE : X PromptOneFile(FIVESTATE,&FSoutfile) ; X break ; X case ALL : X PromptOneFile(ONESTATE,&OSoutfile) ; X PromptOneFile(THREESTATE,&TSoutfile) ; X PromptOneFile(FIVESTATE,&FSoutfile) ; X break ; X } X } X} X Xint PromptMachinesToRun() X{ X int c ; X X while (TRUE) { X printf("\nDo you want to run:\n") ; X printf(" 1. One-State machine\n") ; X printf(" 2. Three-State machine\n") ; X printf(" 3. Five-State machine\n") ; X printf(" 4. One-State machine one (best) alignment\n") ; X printf(" 5. Three-State machine one (best) alignment\n") ; X printf(" 6. All machines\n") ; X printf(" Input option : ") ; X c = getchar() ; readln(stdin) ; X X switch (c) { X case '1' : return ONESTATE ; X case '2' : return THREESTATE ; X case '3' : return FIVESTATE ; X case '4' : return ONESTATER0 ; X case '5' : return THREESTATER0 ; X case '6' : return ALL; X } X } X} X XInitOSParameters(P) X OneStatePBlock *P ; X{ X P->match = 0.8 ; X P->change = 0.1 ; X P->indel = 0.1 ; X} X XInitTSParameters(P) X ThreeStatePBlock *P ; X{ X P->DM = 0.75 ; X P->DC = 0.15 ; X P->DV = P->DH = 0.05 ; X X P->HM = P->VM = 0.37 ; X P->HC = P->VC = 0.10 ; X P->HH = P->VV = 0.50 ; X P->HV = P->VH = 0.03 ; X P->D = P->H = P->V = 1.0/3 ; X} X XInitFSParameters(P) X FiveStatePBlock *P ; X{ X P->DM = 0.748 ; X P->DC = 0.15 ; X P->DV = P->DH = 0.05 ; X P->DA = P->DB = 0.001 ; X X P->HM = P->VM = 0.37 ; X P->HC = P->VC = 0.10 ; X P->HH = P->VV = 0.50 ; X P->HV = P->VH = 0.03 ; X X P->AA = P->BB = 0.99 ; X P->AD = P->BD = 0.01 ; X X P->D = P->H = P->V = P->A = P->B = 0.2 ; X} X XInitParameters(Machine) X int Machine ; X{ X switch (Machine) { X case ONESTATE : X InitOSParameters(&OSParameters) ; X break ; X case ONESTATER0 : X InitOSParameters(&OSR0Parameters) ; X break ; X case THREESTATE : X InitTSParameters(&TSParameters) ; X break ; X case FIVESTATE : X InitFSParameters(&FSParameters) ; X break ; X case THREESTATER0 : X InitTSParameters(&TSR0Parameters) ; X break ; X case ALL : X InitOSParameters(&OSParameters) ; X InitTSParameters(&TSParameters) ; X InitFSParameters(&FSParameters) ; X InitTSParameters(&OSR0Parameters) ; X InitTSParameters(&TSR0Parameters) ; X break ; X } X} X XReadOSParameters(P) X OneStatePBlock *P ; X{ X printf("One-State Machine Parameters:\n") ; X printf(" Input P(Match), P(Change) : ") ; X scanf("%f %f",&P->match,&P->change) ; X P->indel = 1 - P->match - P->change ; X printf("Parameters read in:") ; X OS_printparameters(P) ; putchar('\n') ; X readln(stdin) ; X} X XReadTSParameters(P) X ThreeStatePBlock *P ; X{ X printf("Three-State Machine:\n") ; X printf("State D:\n") ; X printf(" input PDM, PDC :") ; X scanf("%f %f",&P->DM, &P->DC) ; readln(stdin) ; X P->DH = P->DV = (1 - P->DM - P->DC)/2 ; X X printf("State H and V (indels):\n") ; X printf(" input PHM, PHC, PHH :",&P->HM,&P->HC,&P->HH) ; X scanf("%f %f %f",&P->HM,&P->HC,&P->HH) ; readln(stdin) ; X P->VM = P->HM ; P->VC = P->HC ; P->VV = P->HH ; X P->VH = P->HV = 1 - P->HM - P->HC - P->HH ; X X P->D = P->H = P->V = 1/3.0 ; X printf("Parameters read in:\n") ; X TS_printparameters(P) ; putchar('\n') ; X} X XReadFSParameters(P) X FiveStatePBlock *P ; X{ X printf("Five-State Machine:\n") ; X printf("State D-\n") ; X printf(" input PDM, PDC, PDH :") ; X scanf("%f %f %f",&P->DM, &P->DC,&P->DH) ; readln(stdin) ; X P->DV = P->DH ; X P->DB = P->DA = (1 - P->DM - P->DC - P->DH - P->DV)/2 ; X X printf("State H and V(short indels):\n") ; X printf(" input PHM, PHC, PHH :",&P->HM,&P->HC,&P->HH) ; X scanf("%f %f %f",&P->HM,&P->HC,&P->HH) ; readln(stdin) ; X P->HV = P->VH = 1 - P->HM - P->HC - P->HH ; X P->VM = P->HM ; P->VC = P->HC ; P->VV = P->HH ; X printf("State A and B :\n") ; X printf(" input PAA :") ; X scanf("%f",&P->AA) ; readln(stdin) ; X P->BD = P->AD = 1 - P->AA ; X P->BB = P->AA ; X X P->D = P->H = P->V = P->A = P->B = 0.2 ; X printf("Parameters read in:\n") ; putchar('\n') ; X FS_printparameters(P) ; X} X XReadParameters(Machine) X int Machine ; X{ X switch (Machine) { X case ONESTATE : X case ONESTATER0 : X ReadOSParameters(&OSParameters) ; X break ; X case THREESTATE : X case THREESTATER0 : X ReadTSParameters(&TSParameters) ; X break ; X case FIVESTATE : X ReadFSParameters(&FSParameters) ; X break ; X case ALL : X ReadOSParameters(&OSParameters) ; X ReadTSParameters(&TSParameters) ; X ReadFSParameters(&FSParameters) ; X break ; X } X} X XPromptParameters(Machine,mode) X int Machine, *mode ; X{ X int c ; X X do { X printf("\nInitial parameters -- do you want to :\n") ; X printf(" 1. Do adaptive coding on 1st run, or\n") ; X printf(" 2. Use default parameters, or\n") ; X printf(" 3. Input your own\n") ; X printf(" Input option : ") ; X c = getc(stdin); readln(stdin) ; X if ((Machine == ONESTATER0 || Machine == THREESTATER0) && c == '1') { X printf("Options 1 not valid...\n") ; X c = '4' ; X } X } while ((c < '1') || (c > '3')) ; X X switch (c) { X case '1' : *mode = adaptive ; X if (Machine == ALL) { X InitParameters(ONESTATER0) ; X InitParameters(THREESTATER0) ; X } X break ; X case '2' : *mode = fixed ; X InitParameters(Machine) ; X break ; X case '3' : *mode = fixed ; X ReadParameters(Machine) ; X break ; X } X} X X XRunMachines(Machine,mode) X int Machine, mode ; X{ X float Null_ML,TS_ML,FS_ML,OS_ML, OSR0_ML, TSR0_ML ; X align Alignment[MAXALIGNLENGTH] ; X int Alignlength ; X float OSmatrix, TSmatrix, FSmatrix ; X int sumlength ; X int Niteration ; X X sumlength = A.length+B.length ; X X if (Machine == ONESTATE || Machine == ALL) { X printf("\nONE-STATE MACHINE :\n") ; X OneStateML(&A,&B,&OS_ML,&OSmatrix,mode,OSoutfile,&OSParameters,Accurarcy); X printf("1-State Machine: %1.4f(bits/symb) %6.2f(ML) %6.2f(Matrix)\n\n", OS_ML/sumlength,OS_ML,OSmatrix) ; X fflush(stdout) ; X } X X if (Machine == THREESTATE || Machine == ALL) { X printf("\nTHREE-STATE MACHINE :\n"); X ThreeStateMLS(&A,&B,&TS_ML,&TSmatrix,mode,TSoutfile,&TSParameters,Accurarcy); X printf("3-State Machine: %1.4f(bits/symb) %6.2f(ML) %6.2f(Matrix)\n\n", TS_ML/sumlength,TS_ML,TSmatrix) ; X fflush(stdout) ; X } X X if (Machine == FIVESTATE || Machine == ALL) { X printf("\nFIVE-STATE MACHINE :\n") ; X FiveState2ML(&A,&B,&FS_ML,&FSmatrix,mode,FSoutfile,&FSParameters,Accurarcy); X printf("5-State Machine: %1.4f(bits/symb) %6.2f(ML) %6.2f(Matrix)\n\n", FS_ML/sumlength,FS_ML,FSmatrix) ; X fflush(stdout) ; X } X X if (Machine == ONESTATER0 || Machine == ALL) { X printf("\nONE-STATE MACHINE :\n") ; X ROptAlignString(&A,&B,Alignment,&Alignlength,&OSR0_ML,&Niteration,&OSR0Parameters,&DNASymbol) ; X X PrintAlignment(Alignment, Alignlength, A.sequence, B.sequence, &DNASymbol) ; X printf("1-State Machine best alignment: %1.4f(bits/symb) %6.2f(ML) \n\n", OSR0_ML/sumlength,OSR0_ML) ; X fflush(stdout) ; X } X X if (Machine == THREESTATER0 || Machine == ALL) { X printf("\nTHREE-STATE MACHINE best alignment:\n"); X ThreeStateR0ML(&A,&B,Alignment,&Alignlength,&TSR0_ML,TSR0Parameters) ; X X PrintAlignment(Alignment, Alignlength, A.sequence, B.sequence, &DNASymbol) ; X printf("3-State Machine best alignment: %1.4f(bits/symb) %6.2f(ML)\n\n", TSR0_ML/sumlength,TSR0_ML,TSmatrix) ; X fflush(stdout) ; X } X X Null_ML = NullMesgLength(A.length,B.length) ; X X if (Machine == ALL) { X printf("\nSUMMARY\n") ; X printf("-------\n") ; X printf("message length (bits/symbol) :\n") ; X printf("Null: %1.3f OS: %1.3f(%1.3f) TS: %1.3f(%1.3f) FS: %1.3f(%1.3f) OS-R0: %1.3f TS-R0: %1.3f\n", X Null_ML/sumlength, OS_ML/sumlength, OSmatrix/sumlength, X TS_ML/sumlength,TSmatrix/sumlength, X FS_ML/sumlength,FSmatrix/sumlength, X OSR0_ML/sumlength,TSR0_ML/sumlength) ; X putchar('\n') ; X X printf("message length (bits) :\n") ; X printf("Null: %4.0f OS: %4.0f(%4.0f) TS: %4.0f(%4.0f) FS: %4.0f(%4.0f) OS-R0: %4.0f TS-R0: %4.0f\n", X Null_ML, OS_ML, OSmatrix, TS_ML,TSmatrix, FS_ML,FSmatrix,OSR0_ML,TSR0_ML) ; X } else X printf("Null Theory : %1.3f(bits/symb) %4.0f(ML)\n",Null_ML/sumlength,Null_ML) ; X} X Xmain() X{ X int Machine,mode ; X X PromptForStrings() ; X Machine = PromptMachinesToRun() ; X PromptOutputFile(Machine) ; X PromptParameters(Machine,&mode) ; X RunMachines(Machine,mode) ; X} END_OF_driver.c if test 11818 -ne `wc -c