#!/usr/bin/python import random, math # Determine the score of the optimal global alignment of two sequences def globalAlignmentScore(s1, s2): # Initialize variables MATCH = 5 MISMATCH = -4 GAP = -6 NUM_ROWS = len(s2)+1 NUM_COLS = len(s1)+1 # Create table and fill it with zeros table = createTable(NUM_ROWS, NUM_COLS) col = 0 while (col < NUM_COLS): # Fill in first row of table table[0][col] = col * GAP col = col + 1 row = 0 while (row < NUM_ROWS): # Fill in first column of table table[row][0] = row * GAP row = row + 1 row = 1 while (row < NUM_ROWS): # Fill in each row of table col = 1 while (col < NUM_COLS): # Fill in column for each row left = table[row][col-1] + GAP above = table[row-1][col] + GAP charAlign = 0 if (s1[col-1] == s2[row-1]): charAlign = MATCH else: charAlign = MISMATCH diagonal = table[row-1][col-1] + charAlign table[row][col] = max3(left, above, diagonal) col = col + 1 row = row + 1 return table[NUM_ROWS-1][NUM_COLS-1] # Create a table with the given number of rows and columns def createTable(NUM_ROWS, NUM_COLS): table = [] row = 0 while (row < NUM_ROWS): table.append([]) col = 0 while (col < NUM_COLS): table[row].append(0) col = col + 1 row = row + 1 return table # Determine the maximum of 3 numbers def max3(a, b, c): if ((a >= b) and (a >= c)): return a if ((b >= a) and (b >= c)): return b return c # GENERATE ONE OF THE FOUR NUCLEOTIDES AT RANDOM, BASED # ON THE FOUR PARAMETER PROBABILITIES def generateRandomNucleotide(A_prob, C_prob, G_prob, T_prob): r = random.random() # Generate random number between 0.0 and 1.0 if (r < A_prob): return 'A' if (r < (A_prob + C_prob)): return 'C' if (r < (A_prob + C_prob + G_prob)): return 'G' if (r < (A_prob + C_prob + G_prob + T_prob)): return 'T' print ("Error - the four probabilities do not sum to 1.0") return 'X' # GENERATE A RANDOM SEQUENCE OF THE GIVEN LENGTH def generateRandomSequence(length, A_prob, C_prob, G_prob, T_prob): s = "" index = 0 while (index < length): s = s + generateRandomNucleotide(A_prob, C_prob, G_prob, T_prob) index = index + 1 return s # GENERATE ARANDOM SEQUENCE OF THE SAME LENGTH AS "s" AND OF THE # SAME EXPECTED NUCLEOTIDE DISTRIBUTION AS "s" def generateComparableRandomSequence(s): percent_A = float(s.count('A')) / float(len(s)) percent_C = float(s.count('C')) / float(len(s)) percent_G = float(s.count('G')) / float(len(s)) percent_T = float(s.count('T')) / float(len(s)) comparableRandomSequence = generateRandomSequence(len(s), percent_A, percent_G, percent_C, percent_T) return comparableRandomSequence # FOR TWO SEQUENCES, CALCULATE THE DISTANCE SCORE "D", WHERE # "D" IS DEFINED AS 100.0*(-LN(S_norm)). THE NATURAL LOGARITHM, LN, # OF A NUMBER "x" CAN BE CALCULATED IN PYTHON AS math.log(x). # # S_norm IS DEFINED AS # S_norm = (S_global - S_rand) / (S_iden - S_rand) # # S_global IS THE OPTIMAL GLOBAL ALIGNMENT SCORE OF THE TWO SEQUENCES. # # S_iden IS THE AVERAGE OF THE OPTIMAL GLOBAL ALIGNMENT SCORE OF s1 # ALIGNED WITH s1 AND THE OPTIMAL GLOBAL ALIGNMENT SCORE OF s2 # ALIGNED WITH s2. # # S_rand IS THE AVERAGE OF 1000 OPTIMAL GLOBAL ALIGNMENT SCORES. EACH # OF THE 1000 GLOBAL ALIGNMENT SCORES IS CALCULATED BY ALIGNING TWO # RANDOMLY GENERATED SEQUENCES, THE FIRST BEING OF THE SAME LENGTH AND # SAME EXPECTED NUCLEOTIDE COMPOSITION AS s1, AND THE SECOND BEING OF # THE SAME LENGTH AND SAME EXPECTED NUCLEOTIDE COMPOSITION AS s2. def calculateDistanceScore(s1, s2): # Enter your code here! D = 0.0 return D ######################################################## ### End of functions ################################### ######################################################## seq1 = "CGATAGTGCTATATCTAGCGCCGTCTAGATGCATTATACGATATCG" seq2 = "AACGACATGGCTCGTGCTATTACGCGCGAATATCC" seq3 = "ATAGTGCTATACTCGTGCTATTCTAGATGCCGCGATATAT" seq4 = "GGATAGGCTATATCTAGCGCGTCTAGATGCATTTACGATATC" seq5 = "TACGACATGCGCTCGTGCATATTAGCGCGCGATATATCG"