Параллельно, я сделал функцию, которая считает по твоему точному результату:
from math import sqrt
def recurrent(n):
denominator = 7 * (3 + sqrt(21)) * 4**n
term1 = 2*sqrt(21)*(3/2-sqrt(21)/2)**n
term2 = 2*sqrt(21)*(3/2-sqrt(21)/2)**(n-1)
term3 = (21+5*sqrt(21))*(3/2+sqrt(21)/2)**n
term4 = (21+5*sqrt(21))*(3/2+sqrt(21)/2)**(n-1)
numerator = term1 + term2 + term3 + term4
return numerator/denominator
def solve(n):
return 1 - recurrent(n)
# arman = sum(res:=[solve(100) for i in range(10)])/len(res)
# print(arman)
Получаю 0.9950793852734019 (официальное решение / то, что указала Балжан дает 0.999999245)
Дальше, я запустил Монте-Карло на задачу.
Cначала написал функцию проверки наличия фрагмента в последовательности
def isPresent(sequence, fragment):
'''
Inputs:
- sequence (str): a DNA sequence (or just a general string)
- fragment (str): two chars for which we perform the check
Output:
bool - whether el is present in sequence or not
'''
# TO DO: generalize function to general fragment???
last = None
for nucleotide in sequence:
if not last:
last = nucleotide
continue
if last + nucleotide == fragment:
return True
else:
last = nucleotide
return False
# SMAL DEBUG
# t1 = 'CATG'
# t2 = 'CGGT'
# t3 = 'CAGATGC'
# t4 = 'CAGATGG'
# for t in [t1, t2, t3, t4]:
# print(isPresent(t, 'GG'))
Дальше строим рандомную цепочку
import random
NUCLEOTIDES = ['A', 'G', 'C', 'T']
def build_sequence(elts, length):
'''
builds a string composed randomly from elts of given length
'''
seq = ''
for _ in range(length):
seq += random.choice(elts)
return seq
И наконец сама Монте Карло. Строим 1000 последовательностей десять раз:
def estimate_probability(length, elts, fragment, iterations):
'''
In N iterations, estimates probability that fragment is present in a random sequence of length made from elts
'''
fragmentPresent = 0
fragmentNotPresent = 0
for i in range(iterations):
print(f"building seq {i} / {iterations}")
randomSequence = build_sequence(elts, length)
if isPresent(randomSequence, fragment):
fragmentPresent += 1
else:
fragmentNotPresent += 1
return (fragmentPresent)/(fragmentPresent + fragmentNotPresent)
def average_probability(length, elts, fragment, iterations, runs):
'''
Perform N runs of estimate_probability
'''
return sum(probs:=[estimate_probability(length, elts, fragment, iterations) for _ in range(runs)])/len(probs)
length = 100
monteCarlo = average_probability(length, NUCLEOTIDES, 'GG', 1000, 10)
theory = theory.solve(length)
print(f"\n{monteCarlo} (MonteCarlo)\n{theory} (Theory)\n{theory-monteCarlo} (diff)\n")
В итоге, получаем
0.9953999999999998 (MonteCarlo)
0.9950793852734019 (Theory)
-0.0003206147265979453 (diff)