import random, math # Determine the score of the optimal local alignment of two sequences def localAlignmentScore(s1, s2): # Initialize variables MATCH = 5 MISMATCH = -4 GAP = -6 NUM_ROWS = len(s2)+1 NUM_COLS = len(s1)+1 # Create table and fill it with zeros table = createTable(NUM_ROWS, NUM_COLS) col = 0 while (col < NUM_COLS): # Fill in first row of table table[0][col] = 0 col = col + 1 row = 0 while (row < NUM_ROWS): # Fill in first column of table table[row][0] = 0 row = row + 1 maxInTable = -1 # Keep track of maximum score in table row = 1 while (row < NUM_ROWS): # Fill in each row of table col = 1 while (col < NUM_COLS): # Fill in column for each row left = table[row][col-1] + GAP above = table[row-1][col] + GAP charAlign = 0 if (s1[col-1] == s2[row-1]): charAlign = MATCH else: charAlign = MISMATCH diagonal = table[row-1][col-1] + charAlign table[row][col] = max4(left, above, diagonal, 0) # Keep track of the maximum score in the table if (table[row][col] > maxInTable): maxInTable = table[row][col] col = col + 1 row = row + 1 return maxInTable # Create a table with the given number of rows and columns def createTable(NUM_ROWS, NUM_COLS): table = [] row = 0 while (row < NUM_ROWS): table.append([]) col = 0 while (col < NUM_COLS): table[row].append(0) col = col + 1 row = row + 1 return table # Determine the maximum of 3 numbers def max3(a, b, c): if ((a >= b) and (a >= c)): return a if ((b >= a) and (b >= c)): return b return c # Determine the maximum of 4 numbers def max4(a, b, c, d): return max3(max3(a, b, c), d, d) # ADD VALUE TO HISTOGRAM def addToHistogram(histogram, value): # If histrogram is empty, create histogram and initialize all entries to zero SIZE = 100 if (len(histogram) == 0): index = 0 while (index < SIZE): histogram.append(0) index = index + 1 # Add value to histogram if (value < 0): value = 0 if (value >= SIZE): value = SIZE - 1 histogram[value] = histogram[value] + 1 # PRINT OUT HISTOGRAM def printHistogram(histogram): index = 0 while (index < len(histogram)): print index, "\t", histogram[index] index = index + 1 # CALCULATE MEAN VALUE OF HISTOGRAM def mean(histogram): sum = 0 # Sum of all scores in histogram count = 0 # Number of elements in histogram index = 0 while (index < len(histogram)): sum = sum + index * histogram[index] count = count + histogram[index] index = index + 1 return float(sum) / float(count) # CALCULATE THE STANDARD DEVIATION OF THE HISTOGRAM def standardDeviation(histogram): average = mean(histogram) # Determine the mean of the histogram stddev = 0 # Sum of deviations from mean count = 0 # Number of elements in histogram index = 0 while (index < len(histogram)): stddev = stddev + histogram[index] * (index - average) * (index - average) count = count + histogram[index] index = index + 1 return math.sqrt(float(stddev) / float(count)) # PRINT THE VALUE OF THE LOCATION PARAMETER, MU def printLocationParameter(histogram): mu = mean(histogram) - 0.5772 * printScaleParameter(histogram) print "Location parameter, mu:", mu return mu # PRINT THE VALUE OF THE SCALE PARAMETER, BETA def printScaleParameter(histogram): beta = standardDeviation(histogram) * math.sqrt(6.0) / math.pi print "Scale parameter, beta:", beta return beta # REPEATEDLY GENERATE A PAIR OF RANDOM SEQUENCES, ALIGN THEM, # AND ADD OPTIMAL LOCAL ALIGNMENT SCORE TO HISTOGRAM def alignManyRandomSequences(numberOfAlignments): # This function needs to be re-written by YOU. # The function currently populates a histogram with random # numbers. You should modify the function so that instead # of populating the histogram with random numbers, the # histogram is populated by the optimal score of locally # aligning two random DNA sequences, each of length 30nt. histogram = [] index = 0 while (index < numberOfAlignments): score = random.randint(0, 99) # Generate random number 0 to 99 addToHistogram(histogram, score) # Add number to histogram index = index + 1 printHistogram(histogram) ######################################################## ### End of functions ################################### ######################################################## alignManyRandomSequences(1000)