Биология → Областная → 2018 → 11 класс | BeyondOlympiads

Параллельно, я сделал функцию, которая считает по твоему точному результату:

from math import sqrt 
def recurrent(n):
    denominator = 7 * (3 + sqrt(21)) * 4**n
    term1 = 2*sqrt(21)*(3/2-sqrt(21)/2)**n
    term2 = 2*sqrt(21)*(3/2-sqrt(21)/2)**(n-1)
    term3 = (21+5*sqrt(21))*(3/2+sqrt(21)/2)**n
    term4 = (21+5*sqrt(21))*(3/2+sqrt(21)/2)**(n-1)
    numerator = term1 + term2 + term3 + term4
    return numerator/denominator

def solve(n):
    return 1 - recurrent(n)

# arman = sum(res:=[solve(100) for i in range(10)])/len(res)
# print(arman)

Получаю 0.9950793852734019 (официальное решение / то, что указала Балжан дает 0.999999245)

Дальше, я запустил Монте-Карло на задачу.

Cначала написал функцию проверки наличия фрагмента в последовательности

def isPresent(sequence, fragment):
    '''
        Inputs:
            - sequence (str): a DNA sequence (or just a general string)
            - fragment (str): two chars for which we perform the check
        
        Output:
            bool - whether el is present in sequence or not
    '''
    # TO DO: generalize function to general fragment???
    last = None
    for nucleotide in sequence:
        if not last:
            last = nucleotide
            continue
        if last + nucleotide == fragment:
            return True
        else:
            last = nucleotide

    return False

# SMAL DEBUG
# t1 = 'CATG'
# t2 = 'CGGT'
# t3 = 'CAGATGC'
# t4 = 'CAGATGG'

# for t in [t1, t2, t3, t4]:
#     print(isPresent(t, 'GG'))

Дальше строим рандомную цепочку

import random
NUCLEOTIDES = ['A', 'G', 'C', 'T']

def build_sequence(elts, length):
    '''
        builds a string composed randomly from elts of given length
    '''
    seq = ''
    for _ in range(length):
        seq += random.choice(elts)
    return seq

И наконец сама Монте Карло. Строим 1000 последовательностей десять раз:


def estimate_probability(length, elts, fragment, iterations):
    '''
        In N iterations, estimates probability that fragment is present in a random sequence of length made from elts
    '''
    fragmentPresent = 0
    fragmentNotPresent = 0
    for i in range(iterations):
        print(f"building seq {i} / {iterations}")
        randomSequence = build_sequence(elts, length)
        if isPresent(randomSequence, fragment):
            fragmentPresent += 1
        else:
            fragmentNotPresent += 1
    return (fragmentPresent)/(fragmentPresent + fragmentNotPresent)

def average_probability(length, elts, fragment, iterations, runs):
    '''
        Perform N runs of estimate_probability
    '''
    return sum(probs:=[estimate_probability(length, elts, fragment, iterations) for _ in range(runs)])/len(probs)

length = 100
monteCarlo = average_probability(length, NUCLEOTIDES, 'GG', 1000, 10)
theory = theory.solve(length)

print(f"\n{monteCarlo} (MonteCarlo)\n{theory} (Theory)\n{theory-monteCarlo} (diff)\n")

В итоге, получаем

0.9953999999999998 (MonteCarlo)
0.9950793852734019 (Theory)
-0.0003206147265979453 (diff)
7 лайков