###################################################################### ##### READ IN FASTA FILE AND STORE CONTENTS IN *SEQUENCE* ############ ###################################################################### file = open("protein.txt") # Read in first line of file and check for FASTA format headerLine = file.readline() if (headerLine[0] == '>'): # First character is '>' print "The file is in FASTA format." else: print "The file is not in FASTA format." # Read in the rest of the file (i.e., the sequence) sequence = file.read() # Remove all carriage returns from the sequence sequence = sequence.replace("\n", "") ###################################################################### ##### PRINT OUT ALL NON-NUCLEOTIDE CHARACTERS IN *SEQUENCE* ########## ###################################################################### # Search for ambiguous characters in nucleotide sequence indexOfCurrentNucleotide = 0 while (indexOfCurrentNucleotide < len(sequence)): if (sequence[indexOfCurrentNucleotide] not in "ACGT"): print "I don't recognize the character: ", sequence[indexOfCurrentNucleotide] indexOfCurrentNucleotide = indexOfCurrentNucleotide + 1