Full Likelihood Inference Procedure based on SFS

³⁹⁰ views

Full Likelihood Inference from the Site Frequency Spectrum of a Non-recombining Locus

Raazesh Sainudiin and Amandine Véber

This is a public code accompanying the research article:

Full Likelihood Inference from the Site Frequency Spectrum based on the Optimal Tree Resolution, Raazesh Sainudiin and Amandine Véber, 2018

The Sampler

%auto
import pylab
import numpy as np

# set random seed of the random number generator
set_random_seed(0)
np.random.seed(int(1235))

# updating the topology

def DProduct(j,betaSpl):
    '''compute the product terms appearing in the probability of a split'''
    if j<2:
        prod=1
    else:
        x=j+betaSpl
        prod=x
        for k in range(1,j-1):
            prod= prod*(x-k)
    return prod

def BetaSplit(betaSpl,m,J):
    '''indicator of m being split into J(>= m/2) and m-J conditionally on no previous split, and the corresponding event probability, under the Beta-splitting model'''
    if (J==ceil(m/2)):
        I=1
        w=1
    else:
        mm = floor(m/2)
        if(betaSpl==0):
            #Kingman case
            D = sum(2/(m-1) for k in range(mm+1,J+1))
            if (m%2==0):
                D = D + 1/(m-1)
            p=(2/(m-1))/D
        else:
            D = 2*(sum(binomial(m,k)* DProduct(k,betaSpl)* DProduct(m-k,betaSpl) for k in range(mm+1,J+1)))
            if (m%2==0):
                D = D+  binomial(m,mm)* ((DProduct(mm,betaSpl))**2)
            p = 2*(binomial(m,J))*DProduct(J,betaSpl)*DProduct(m-J,betaSpl)/D
        U = np.random.random_sample()
        if (U<p):
            I=1
            w=p
        else:
            I=0
            w=1-p
    return [I,w]

def UncondSplit(betaSpl,m,J):
    '''compute the probability that m is split into J>=m/2 and m-J in Beta-splitting model without calling the beta functions'''
    if(betaSpl==0):
        #Kingman case
        if(2*J==m and m%2==0):
            p=1/(m-1)
        else:
            p=2/(m-1)
    else:
        mm= floor(m/2)
        D = 2*(sum(binomial(m,k)*DProduct(k,betaSpl)*DProduct(m-k,betaSpl) for k in range(mm+1,m)))
        if (m%2==0):
            aux2 = binomial(m,mm)* ((DProduct(mm,betaSpl))**2)
            D= D+ aux2
        N= binomial(m,J)* DProduct(J,betaSpl)* DProduct(m-J,betaSpl)
        if (2*J==m):
            p=N/D
        else:
            p=2*N/D
    return p

def IndexSplit(n,k,V):
    '''index of largest block if larger than k, according to n-vector V'''
    m=k-1
    for i in range(n-1,k-1,-1):
        if (V[i]>0):
            m=i
            break
    return m

def Sstep(betaSpl,n,j,C,F,w):
    '''column filling for j less than n/2 in Beta-splitting model, with control sequence C and proposal weight w. F[k,0] gives the size of the largest edge created by the split between epochs k-1 and k, F[k,n] gives the size of the edge split between epochs k-1 and k.'''
    J=n-j
    F[2,n]=n
    if (sum(F[2,J+1:n])==1):
        F[2,J]=0
    else:
        if (  (C[J]>0) or ( (j==floor(n/2)) and (n%2 == 1) )  ):
            F[2,J]=1
        else:
            A= BetaSplit(betaSpl,n,J)
            F[2,J]= A[0]
            w= w*(A[1])
    if F[2,J]>0:
        F[2,0]=J
        C[J]=0
    for k in range(3,j+2):
        if (sum(F[k,J+1:n])>0):
            F[k,J]=0
        else:
            m= IndexSplit(n,J,F[k-1,0:n+1])
            if (C[J]>0):
                F[k,J]=1
                F[k,n]=m
                C[J]=0
                if (F[k-1,J]==0):
                    F[k,0]=J
            else:
                if (m<J):
                    F[k,J]=0
                elif (m==J):
                    U=np.random.uniform()
                    q= (m-1)/(n-k+1)
                    if (U< q):
                        F[k,J]=0;
                        F[k,n]=J;
                        w= w*q
                    else:
                        F[k,J]=1
                        w=w*(1-q)
                elif (J==ceil(m/2)):
                    F[k,J]=1
                else:
                    F[k,n]=m
                    A=BetaSplit(betaSpl,m,J)
                    F[k,J]=A[0]
                    if (F[k,J]==1):
                        F[k,0]=J
                    w= w*(A[1])
    return [F,w]

def Hstep(betaSpl,n,C,F,w):
    '''column filling for j=n/2, n even and j>1'''
    j=floor(n/2)
    F[2,n]=n
    if (sum(F[2,j+1:n])==0):
        F[2,j]=2
        F[2,0]=j
        C[j]=0
    else:
        F[2,j]=0
    if (F[2,j]==0):
        for k in range(3,j+2):
            if (sum(F[k,j+1:n])>0):
                F[k,j]=0
            else:
                m= IndexSplit(n,j,F[k-1,0:n+1])
                if (C[j]>0):
                    F[k,j]=1
                    F[k,n]=m
                    C[j]=0
                    if (F[k-1,j]==0):
                        F[k,0]=j
                else:
                    if (m<j):
                        F[k,j]=0
                    elif (m==j):
                        U=np.random.uniform()
                        q= (m-1)/(n-k+1)
                        if (U<q):
                            F[k,j]=0
                            F[k,n]=j
                            w= w*q
                        else:
                            F[k,j]=1
                            w= w*(1-q)
                    elif (j==ceil(m/2)):
                        F[k,j]=1
                        F[k,0]=j
                    else:
                        F[k,n]=m
                        A = BetaSplit(betaSpl,m,j)
                        F[k,j]= A[0]
                        if (F[k,j]==1):
                            F[k,0]=j
                        w= w*(A[1])
    else:
        F[3,j]=1
        F[3,n]=j
        for k in range(4,j+2):
            m= IndexSplit(n,j,F[k-1,0:n+1])
            if (m<j):
                F[k,j]=0
            else:
                U=np.random.uniform()
                q= (m-1)/(n-k+1)
                if (U<q):
                    F[k,j]=0
                    F[k,n]=m
                    w= w*q
                else:
                    F[k,j]=1
                    w= w*(1-q)
    return [F,w]

def Lstep(betaSpl,n,j,C,F,w):
    '''column filling for j larger than n/2 in Beta-splitting model, with control sequence C'''
    J=n-j
    F[2,n]=n
    F[2,J]=F[2,j]
    if F[2,J]>0:
        C[J]=0
    for k in range(3,j+2):
        m=0
        for i in range(J+1,n):
            if F[k-1,i]>F[k,i]:
                m=i
                break
        if (m==0):
            if ((n-pylab.dot(pylab.arange(J,n,1),F[k-1,J:n]))-k+1+sum(F[k-1,J:n])==0):
                F[k,J]=F[k-1,J]-1
                F[k,n]=J
            elif ( (C[J]==0) or (sum(F[k,J+1:n])>0) or (F[k-1,J]>1) ):
                if (F[k-1,J]==0):
                    F[k,J]=0
                else:
                    U=np.random.uniform()
                    q= F[k-1,J]*(J-1)/(n-k+1-sum(F[k,l]*(l-1) for l in range(J+1,n)))
                    if (U< q):
                        F[k,J]=F[k-1,J]-1
                        F[k,n]=J
                        w= w*q
                    else:
                        F[k,J]=F[k-1,J]
                        w=w*(1-q)
            else:
                F[k,J]=F[k-1,J]
        elif (m > 2*J):
            F[k,n]=m
            F[k,J]=F[k-1,J]+(F[k,m-J]-F[k-1,m-J])
        elif ( (m==2*J) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])==0) ):
            F[k,n]=m
            F[k,J]=F[k-1,J]+2
            F[k,0]=J
        elif ( (m <= 2*J) and  (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])>0) ):
            F[k,n]=m
            F[k,J]=F[k-1,J]
        elif ( (J==ceil(m/2)) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])==0) ):
            F[k,n]=m
            F[k,J]=F[k-1,J]+1
            F[k,0]=J
        elif ( (m < 2*J) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])==0) ):
            F[k,n]=m
            if ( (C[J]>0) and (F[k-1,J]==0) ):
                F[k,J]=1
                F[k,0]=J
            else:
                A= BetaSplit(betaSpl,m,J)
                F[k,J]=F[k-1,J] + A[0]
                if (A[0]==1):
                    F[k,0]=J
                w= w*(A[1])
    return [F,w]

def Twostep(betaSpl,n,C,F,w):
    '''column filling for j=n-2 in Beta-splitting model, with control sequence C useless here'''
    j=n-2
    F[2,n]=n
    F[2,2]=F[2,j]
    for k in range(3,j+2):
        m=0
        for i in range(3,n):
            if F[k-1,i]>F[k,i]:
                m=i
                break
        if (m==0):
            F[k,2]=F[k-1,2]-1
            F[k,n]=2
        elif (m > 4):
            F[k,n]=m
            F[k,2]=F[k-1,2]+F[k,m-2]-F[k-1,m-2]
        elif ( (m==4) and (F[k,3]-F[k-1,3]==0) ):
            F[k,n]=m
            F[k,2]=F[k-1,2]+2
            F[k,0]=2
        elif ( (m==4) and (F[k,3]-F[k-1,3]>0) ):
            F[k,n]=m
            F[k,2]=F[k-1,2]
        elif (m==3):
            F[k,n]=m
            F[k,2]=F[k-1,2]+1
            F[k,0]=2
    C[j]=0
    return [F,w]

def Onestep(betaSpl,n,C,F,w):
    '''column filling for j=n-1 in Beta-splitting model, with control sequence C useless here'''
    j=n-1
    F[2,n]=n
    F[2,1]=F[2,j]
    for k in range(3,j+1):
        m=0
        for i in range(3,n):
            if F[k-1,i]>F[k,i]:
                m=i
                break
        if (m>2):
            F[k,1]=F[k-1,1]+(F[k,m-1]-F[k-1,m-1])
            F[k,n]=m
        else:
            F[k,1]=F[k-1,1]+2
            F[k,0]=1
            F[k,n]=2
        F[n,1]=n
    F[n,n]=2
    F[n,0]=1
    C[j]=0
    return [F,w]


# Distributing the mutations

def GammaDensity(k,theta,x):
    '''Density of a Gamma(k,theta) at point x. theta inverse of the rate parameter in companion paper to comply with Python definition of gamma(a,b) distribution'''
    y= 1/(gamma(k)*(theta**k))
    z= y*(x**(k-1))*(exp(-x/theta))
    return z

def Mutate(A,n,j,s,theta,F,M,T,w1,w2):
    '''distribute the s mutations carried by n-j individuals using F and the information T on time already gathered. Updates at the same time the weight w1 associated with mutations and w2 associated with the times'''
    J=n-j
    if (s==0):
        M[0:n+1,J]= pylab.zeros(n+1)
    else:
        V=[(F[k,J]*T[k]) for k in range(0,n+1)]
        L= sum(V[2:j+2])
        W=[(V[k]/L) for k in range(0,n+1)]
        M[0:n+1,J]= np.random.multinomial(s,W)
        d = multinomial([int(i) for i in M[0:n+1,J]])
        dd = RR(d)
        for k in range(0,n+1):
            dd = dd * (RR(W[k])**(RR(M[k,J])))
        w1 = RR(w1)*dd
    for k in range (2,j+2):
        if F[k,J]>0:
            a=1+(sum(M[k,J:n]))
            b=1/(A[k]+theta*(sum(F[k,J:n])))
            T[k]= np.random.gamma( a, b )
            w2 = (w2) * (GammaDensity(RR(a),RR(b),RR(T[k])))
    return [M,T,w1,w2]


#Producing F and M from S (global algorithm)

def Control(n,S):
    '''build the control vector C from S, with sample size n'''
    C=pylab.zeros(n+1)
    for k in range(0,n+1):
        if (S[k]>0):
            C[k]=1
    return C

def BuildingFM(A,betaSpl,n,S,theta,F,M,T,w,w1,w2,k1,k2):
    '''modify topology F, mutation matrix M and epoch time vector T consistent with the SFS S, from column n-k1 to n-k2 in Beta-splitting model and with a priori rates A. Updates the weights w, w1 and w2 as well.'''
    C=Control(n,S)
    X=[F,w]
    Y=[M,T,w1,w2]
    if(n==2):
        X[0][2,0:3]=[1,2,2]
        C[1]=0
        Y=Mutate(A,n,1,S[1],theta,X[0],M,T,w1,w2)
    elif(n==3):
        if k1==1:
            X[0][2,0:4]=[2,1,1,3]
            C[2]=0
            Y=Mutate(A,n,1,S[2],theta,X[0],M,T,w1,w2)
        if k2==2:
            X[0][3,0:4]=[1,3,0,2]
            C[1]=0
            Y=Mutate(A,n,2,S[1],theta,X[0],Y[0],Y[1],Y[2],Y[3])
    else:
        K2=min(ceil(n/2)-1,k2)
        for j in range(k1,K2+1):
            X=Sstep(betaSpl,n,j,C,X[0],X[1])
            Y=Mutate(A,n,j,S[n-j],theta,X[0],Y[0],Y[1],Y[2],Y[3])
        if (n%2 == 0) and (k2 > K2) and (k1 <= floor(n/2)):
            j=floor(n/2)
            X=Hstep(betaSpl,n,C,X[0],X[1])
            Y=Mutate(A,n,j,S[j],theta,X[0],Y[0],Y[1],Y[2],Y[3])
        K1=max(k1,floor(n/2)+1)
        K2=min(k2,n-3)
        for j in range(K1,K2+1):
            X=Lstep(betaSpl,n,j,C,X[0],X[1])
            Y=Mutate(A,n,j,S[n-j],theta,X[0],Y[0],Y[1],Y[2],Y[3])
        if (k1 < n-1) and (k2 > n-3):
            X=Twostep(betaSpl,n,C,X[0],X[1])
            Y=Mutate(A,n,n-2,S[2],theta,X[0],Y[0],Y[1],Y[2],Y[3])
        if (k2 == n-1):
            X=Onestep(betaSpl,n,C,X[0],X[1])
            Y=Mutate(A,n,n-1,S[1],theta,X[0],Y[0],Y[1],Y[2],Y[3])
    return [X[0],Y[0],Y[1],X[1],Y[2],Y[3]]

def MakeHistory(A,betaSpl,n,S,theta):
    '''return SFS-history (F,M,T) compatible with SFS S, and proposal weights (w,w1,w2). n sampling size, A vector of a priori rates for epoch times, betaSpl a priori parameter of Beta-splitting model, theta scaled mutation rate'''
    M = pylab.zeros((n+1,n+1))
    F = pylab.zeros((n+1,n+1))
    T = pylab.zeros(n+1)
    for k in range(0,n+1):
        if (A[k]==0):
            T[k]=1
        else:
            T[k]=np.random.exponential(1/A[k])
    k1=1
    k2=n-1
    X=BuildingFM(A,betaSpl,n,S,theta,F,M,T,1,1,1,k1,k2)
    return X

def MakeHistory0(A,betaSpl,n,theta):
    '''return F and M-matrices independent of the mutation, and times sampled according to the a priori coalescence rates'''
    S = pylab.zeros(n+1)
    M = pylab.zeros((n+1,n+1))
    F = pylab.zeros((n+1,n+1))
    T = [1 for k in range(0,n+1)]
    k1=1
    k2=n-1
    X= BuildingFM(A,betaSpl,n,S,theta,F,M,T,1,1,1,k1,k2)
    MM = pylab.zeros((n+1,n+1))
    TT = pylab.zeros(n+1)
    for k in range(0,n+1):
        if (A[k]==0):
            TT[k]=1
        else:
            TT[k]=np.random.exponential(1/A[k])
    return [X[0],MM,TT,X[3],1.,ProbOfTHom(A,TT,n)]

Simulating Data

%auto
#Making SFS from F and T

def MakeSFS(F,T,theta,n):
    '''create SFS out of L=TF and theta = per locus scaled mutation rate'''
    S=pylab.zeros(n+1)
    L=pylab.dot(T,F)
    for i in range(1,n):
        S[i]=np.random.poisson(theta*L[i])
    return S


#Producing epoch times

def KingmanRates(n):
    '''return Kingman rates'''
    return pylab.array([0.,0.]+[k*(k-1)/2.0 for k in range(2,n+1)])

def EpochtimeConst(R,N0):
    '''vector of exponential times with rates given in vector R and constant pop size N0'''
    return [np.random.exponential(N0/r) for r in R]

def EpochtimeExpGr(Rk,N0,g,s):
    '''return a sample of time Tk of epoch k with sample-size dependent rate Rk,
       starting from time s, in an exponentially growing population with growth rate g from
       initial pop size N0'''
    if g==0:
        return np.random.exponential(N0/Rk)
    else:
        return (1./g)*log(1.0 - (exp(-g*s)*N0*g*(log(np.random.uniform())))/Rk)

def EpochTimesExpGr(n,R,N0,g):
    '''return a sample vector of inhomogenous exponential RVs, with sample-size dependent
       epoch-specific coalescent rates given by vector R in an exponentially growing population
       with growth rate g from initial pop size N0'''
    t=pylab.zeros(n+1)
    t[n]=EpochtimeExpGr(R[n],N0,g,0)
    for k in range(n-1,1,-1):
        t[k]=EpochtimeExpGr(R[k],N0,g,t[n:k+1:-1].sum())
    return t

def EpochtimeBtl(k,N0,a,b,eps,s):
    '''return a sample of time Tk of epoch k starting from time s, under Kingman coalescent with pop size eps times N0 between times a and b, and N0 otherwise'''
    if (s>=b):
        return np.random.exponential(2*N0/(k*(k-1)))
    else:
        U= np.random.uniform()
        if (s>a):
            if ( U < 1- exp(-k*(k-1)*(b-s)/( 2*eps*N0 )) ):
                return -2*eps*N0*log(1-U)/( k*(k-1) )
            else:
                return -2*N0*log(1-U)/( k*(k-1) ) - (b-s)*(1/eps - 1)
        else:
            if ( U < 1- exp(-k*(k-1)*(a-s)/(2*N0)) ):
                return -2*N0*log(1-U)/(k*(k-1))
            elif ( U < 1- exp(- k*(k-1)*( (b-a)/eps + a-s )/(2*N0))):
                return eps*(-2*N0*log(1-U)/(k*(k-1)) + (a-s)*(1/eps - 1) )
            else:
                return -2*N0*log(1-U)/(k*(k-1)) - (b-a)*(1/eps - 1)

def EpochTimesBtl(n,N0,a,b,eps):
    '''return a sample vector of inhomogenous exponential RVs, under Kingman coalescent with pop size eps times N0 between times a and b, and N0 otherwise. n is sample size'''
    t=pylab.zeros(n+1)
    t[n]=EpochtimeBtl(n,N0,a,b,eps,0)
    for k in range(n-1,1,-1):
        t[k]=EpochtimeBtl(k,N0,a,b,eps,t[n:k+1:-1].sum())
    return t


#Creating SFS and associated pair (F,T) in classical scenarios

def MakeS(betaSpl,N0,g,mu,n):
    '''make SFS data S from given parameters and return T,F,S for exponential growth model of demography (g=0 is standard Kingman coalescent)'''
    T=EpochTimesExpGr(n,KingmanRates(n),N0,g)
    A = [1 for k in range(0,n+1)] # this is just dummy
    X = MakeHistory0(A,betaSpl,n,mu)
    S=MakeSFS(X[0],T,mu,n)
    return [T,X[0],S]

def MakeS_Btl(betaSpl,N0,a,b,eps,mu,n):
    '''make sfs data S from given parameters and return T,F,S for bottleneck model of demography'''
    T=EpochTimesBtl(n,N0,a,b,eps)
    A = [1 for k in range(0,n+1)] # this is just dummy
    X = MakeHistory0(A,betaSpl,n,mu)
    S=MakeSFS(X[0],T,mu,n)
    return [T,X[0],S]

Computing Probabilities and Likelihoods

%auto
# Probability of a Topology in Beta-splitting model

def ProbOfF(betaSpl,f,n):
    '''compute probability of topology F in (unconditioned) Beta-splitting model, given beta and sample size n'''
    f[1,n]=1
    pr=RR(1.0)
    for k in range(2,n+1):
        mk=floor(f[k,n])
        lk=floor(f[k,0])
        pr=pr*(f[k-1,mk]*(mk-1)/(n-k+1))*UncondSplit(betaSpl,mk,lk)
    return pr


# Densities of rate-parametrized epoch times

def ProbOfTHom(r,t,n):
    '''return the density of the time vector t=[t_2,...t_n] under coalescent model with epoch-dependent rates r(k)'''
    a2 = RR(1.0)
    for i in range(2,n+1):
        a2 = a2 * RR(r[i]) * exp(-(RR(t[i]) * RR(r[i])))
    return a2

def ProbOfTExG(g,t,n):
    '''return the density of the time vector t=[t_2,...t_n] under Kingman coalescent with exponentially growing population with growth rate g'''
    if(g==0):
        a=RR(1.0)
        for k in range(2,n+1,1):
            kC2=RR(k*(k-1)/2);
            a = a * kC2 * exp(-kC2*t[k])
    else:
        Tnk = RR(t[n]);
        a = RR(n*(n-1)/2 * exp(g*Tnk - n*(n-1)/(2*g) *  1 * (exp(g*t[n])-1) ) );
        for k in range(n-1,1,-1):
            kC2=RR(k*(k-1)/2);
            a = a * kC2 * exp(g*(Tnk+t[k]) - ( kC2/g * exp(g*(Tnk)) * (exp(g*t[k])-1) ) );
            Tnk=Tnk+t[k];
    return a

def ProbOfTBtl(a,b,eps,N0,t,n):
    '''return the density of the time vector t=[t_2,...,t_n] under the Kingman with bottleneck scenario where pop size is eps*N0 between times a and b, and N0 otherwise'''
    if (t[n]>= b):
        bin= RR(n*(n-1)/(2*N0))
        x= RR(bin*exp( - bin*((b-a)*(1/eps -1)+t[n]) ))
        for k in range(2,n):
            bink= RR(k*(k-1)/(2*N0))
            x = x* bink * exp(-bink*t[k])
    else:
        bart=pylab.zeros(n+2)
        bart[n]=t[n]
        for k in range(n-1,1,-1):
            bart[k]= bart[k+1]+t[k]
        K=2
        KK=2
        for k in range(n,1,-1):
            if (bart[k]>a):
                K=k+1
                break
        for k in range(K-1,1,-1):
            if (bart[k]>b):
                KK=k+1
                break
        if (K==KK) and (K>2):
            bin = RR((K-1)*(K-2)/(2*N0))
            x = RR(bin*exp( -bin*( (b-a)*(1/eps -1)+ t[K-1] ) ))
            for j in range(2,K-1):
                binj= j*(j-1)/(2*N0)
                x= x * RR(binj * exp(- binj * t[j]))
            for j in range(K,n+1):
                binj= j*(j-1)/(2*N0)
                x= x * RR(binj * exp(- binj*t[j]))
        elif (K==KK) and (K==2):
            x = ProbOfTHom(KingmanRates(n)/N0,t,n)
        elif (KK>2):
            bin1 = (K-1)*(K-2)/(2*N0)
            bin2 = (KK-1)*(KK-2)/(2*N0)
            x = RR((bin1/eps)*(bin2)*exp( -bin1*(a-bart[K] + (bart[K-1]-a)/eps) -bin2*( (b-bart[KK])/eps + bart[KK-1]-b ) ))
            for j in range(2,KK-1):
                binj= j*(j-1)/(2*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
            for j in range(KK,K-1):
                binj= j*(j-1)/(2*eps*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
            for j in range(K,n+1):
                binj= j*(j-1)/(2*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
        else:
            bin1 = (K-1)*(K-2)/(2*N0)
            x = RR((bin1/eps)*exp( -bin1*(a-bart[K] + (bart[K-1]-a)/eps) ))
            for j in range(2,K-1):
                binj= j*(j-1)/(2*eps*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
            for j in range(K,n+1):
                binj= j*(j-1)/(2*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
    return x


# Conditional probabilities

def ProbOfMGivenFTmu(f,m,t,mu,n):
    '''return the P(M | F,T,mu), mu scaled mutation rate (theta earlier)'''
    a=RR(1.0)
    for j in range(1,n):
        for k in range(2,n+1):
            r=mu*RR(f[k,j])*RR(t[k])
            a=a*exp(-r)*r^(RR(m[k,j]))/factorial(RR(m[k,j]))
    return a

def ProbOfSfsGivenFTmu(f,s,t,mu,n):
    '''return the P(S | F,T,mu) of SFS S given topology F and epoch times T, mu scaled mutation rate'''
    a=RR(1.0)
    L=pylab.dot(t,f)
    for j in range(1,n):
        r=mu*RR(L[j])
        a=a*exp(-r)*r^(RR(s[j]))/factorial(RR(s[j]))
    return a

def ProbOfFMTGivenRatesBetaMu(f,m,t,r,betaSpl,mu,n):
    '''return P(F,M,T) given rates, beta, and mu in Beta-splitting model with epoch-wise homogeneous coalescence rates given by vector r'''
    return ProbOfF(betaSpl,f,n)*ProbOfTHom(r,t,n)*ProbOfMGivenFTmu(f,m,t,mu,n)

def ProbOfFMTGivenGrowthBetaMu(f,m,t,g,betaSpl,mu,n):
    '''return P(F,M,T) given exponential growth rate, beta, and mu in Beta-splitting model with exponential growth of pop size'''
    return ProbOfF(betaSpl,f,n)*ProbOfTExG(g,t,n)*ProbOfMGivenFTmu(f,m,t,mu,n)

def ProbOfFSfsTGivenGrowthBetaMu(f,s,t,g,betaSpl,mu,n):
    '''return P(F,S,T) given exponential growth rate, betaSpl, and mu'''
    return ProbOfF(betaSpl,f,n)*ProbOfTExG(g,t,n)*ProbOfSfsGivenFTmu(f,s,t,mu,n)

def ProbOfFSfsTGivenBottleneckBetaMu(f,s,t,a,b,eps,N0,betaSpl,mu,n):
    '''return P(F,S,T) given bottleneck model parameters: a,b,eps,N0, and betaSpl and mu'''
    return ProbOfF(betaSpl,f,n)*ProbOfTBtl(a,b,eps,N0,t,n)*ProbOfSfsGivenFTmu(f,s,t,mu,n)

# Multi-locus based likelihood of parameters under different scenarios

def LklOfBetaThetaGrowthRates(betaSpl,Mu,g,GrRates,n,sfsList,Reps):
    '''return the log likelihood across loci for beta, Mu and growth rate g, specifying a vector GrRates of a priori coalescence rates. sfsList is a list of SFS at all loci considered, Reps is the number of particles produced per locus to evaluate the per-locus likelihood.'''
    jointLklAcrossLoci=RR(1.0)
    jointLogLklAcrossLoci=RR(0.0)
    for i in range(len(sfsList)):
        sfs=sfsList[i]
        Lkl2=pylab.zeros(Reps)
        for r in range(Reps):
            Y=MakeHistory(GrRates,betaSpl,n,sfs,Mu)
            wt=Y[3]*Y[4]*Y[5]
            numwt2=ProbOfFSfsTGivenGrowthBetaMu(Y[0],sfs,Y[2],g,betaSpl,Mu,n)
            Lkl2[r]=numwt2/wt
        Lkl2Mean=Lkl2.mean()
        jointLklAcrossLoci=jointLklAcrossLoci*Lkl2Mean
        jointLogLklAcrossLoci=jointLogLklAcrossLoci+log(Lkl2Mean)
    print 'beta, mu, g, lkl, logLkl = ',betaSpl,' ,',Mu,' ,',g,' ,',jointLklAcrossLoci,' ,',jointLogLklAcrossLoci
    return jointLogLklAcrossLoci

def LklOfBetaThetaGrowth(betaSpl,Mu,g,n,sfsList,Reps):
    '''return the log likelihood estimates for beta parameter betaSpl, mutation rate Mu and growth rate g from list of observed S, by first computing a priori mean coalescence rates and then produce Reps particles per locus.'''
    RateSamples=1000  # you may need to increase this for larger n!
    GrRates = GetRatesForGrowth(g,n,RateSamples)
    jointLogLklAcrossLoci = LklOfBetaThetaGrowthRates(betaSpl,Mu,g,GrRates,n,sfsList,Reps)
    return jointLogLklAcrossLoci

def LklOfBetaThetaBottleneckRates(betaSpl,Mu,a,b,eps,N0,btlnckRates,n,sfsList,Reps):
    '''return the lok likelihood across loci for beta, Mu, a, b, eps and N0, specifying a vector btlnckRates of a priori coalescence rates. sfsList is a list of SFS at all loci considered, Reps is the number of particles produced per locus to evaluate the per-locus likelihood.'''
    jointLogLklAcrossLoci=RR(0.0)
    for i in range(len(sfsList)):
        sfs=sfsList[i]
        Lkl2=pylab.zeros(Reps)
        for r in range(Reps):
            Y=MakeHistory(btlnckRates,betaSpl,n,sfs,Mu)
            wt=Y[3]*Y[4]*Y[5]
            numwt2=ProbOfFSfsTGivenBottleneckBetaMu(Y[0],sfs,Y[2],a,b,eps,N0,betaSpl,Mu,n)
            Lkl2[r]=numwt2/wt
        Lkl2Mean=Lkl2.mean()
        jointLogLklAcrossLoci=jointLogLklAcrossLoci+log(Lkl2Mean)
    print 'beta, mu, a, b, eps, N0, logLkl = ',betaSpl,' ,',Mu,' ,',a,' ,',b,' ,',eps,' ,',N0,' ,',jointLogLklAcrossLoci
    return jointLogLklAcrossLoci

def LklOfBetaThetaBottleneck(betaSpl,Mu,a,b,eps,N0,n,sfsList,Reps):
    '''return the log likelihood estimates for beta, Mu and bottleneck parameters a,b,eps,N0 for observed SFS in sfsList, by first computing a priori mean coalescence rates in this scenario. Reps is number of particles produced for each SFS'''
    RateSamples=1000 # you may need to increase this for larger n!
    BtlRates = GetRatesForBottleneck(a,b,eps,N0,n,RateSamples)
    jointLogLklAcrossLoci = LklOfBetaThetaBottleneckRates(betaSpl,Mu,a,b,eps,N0,BtlRates,n,sfsList,Reps)
    return jointLogLklAcrossLoci


###############################################################################
# This Halton sequence code is by Richard P. Muller ([email protected]).
# http://pistol.googlecode.com/svn/trunk/Pistol/QuasiRandom.py
prime = [2.0,3.0,5.0,7.0,11.0,13.0,17.0,19.0,23.0,29.0,31.0,37.0,
         41.0,43.0,47.0,53.0,59.0,61.0,67.0,71.0,73.0,79.0,83.0,
         89.0,97.0,101.0,103.0,107.0,109.0,113.0,127.0,131.0,137.0,
         139.0,149.0,151.0,157.0,163.0,167.0,173.0]
class HaltonSequence:
    """Another algorithm to compute a d-dimensional Halton Sequence.
    This one gives the same results as the other."""
    def __init__(self,dim,atmost=10000):
        if dim > len(prime): raise "dim < %d " % len(prime)
        self.dim = dim

        self.err = 0.9/(atmost*prime[dim-1])

        self.prime = [0]*self.dim
        self.quasi = [0]*self.dim
        for i in range(self.dim):
            self.prime[i] = 1./prime[i]
            self.quasi[i] = self.prime[i]
        return

    def __call__(self):
        for i in range(self.dim):
            t = self.prime[i]
            f = 1.-self.quasi[i]
            g = 1.
            h = t
            while f-h < self.err:
                g = h
                h = h*t
            self.quasi[i] = g+h-f
        return self.quasi
# END of Halton sequence code by Richard P. Muller ([email protected]).
###############################################################################

Computing useful statistics

%auto
def WattersonsThetaEst(s,n):
    '''return Watterson's estimate of mutation rate based on number of segregating sites s and average length of Kingman's coalescent with sample size n'''
    est=0
    for i in range(1,n):
        est=est+(2/i)
    return s/est

def GetRatesForGrowth(g,n,RateSamples):
    '''return the mean estimated rates for each epoch under growth g and sample size n, based on RateSamples samplings from the law'''
    A=KingmanRates(n)
    B=pylab.zeros(n+1)

    for j in range(RateSamples):
        B=B+EpochTimesExpGr(n,A,1,g)
    for j in range(2,n+1):
        B[j]= RateSamples/B[j]
    return B

def GetRatesForBottleneck(a,b,eps,N0,n,RateSamples):
    '''return the mean estimated rates for each epoch under Kingman's coalescent with bottleneck model with start and end times a<b, with initial population size N0 and population reduction eps, based on RateSamples samplings from the law'''
    B=pylab.zeros(n+1)
    for j in range(RateSamples):
        B=B+EpochTimesBtl(n,N0,a,b,eps)
    for j in range(2,n+1):
        B[j]= RateSamples/B[j]
    return B

Producing data to test the algorithms under the exponential growth model

# dataset 1G - KINGMAN'S COALESCENT WITH NO GROWTH

# Produce a list of NumLoci2 SFS from Kingman's coalescent with growth
set_random_seed(12456546546)
np.random.seed(int(78786767))
n=15
N0=1 # this is fixed at unity as in Hudson's ms
TrueBeta=0
TrueG=0.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

70.05

# Likelihood based on the full list of SFS
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci2)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1,10];
ThetaList=[1,10,100];
results=[]
Reps=100; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci2,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 0.000000000000000  , 0.0  , -7667.7511712
-7667.7511712
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 0.000000000000000  , 0.0  , -5226.21481293
-5226.21481293
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 0.000000000000000  , 0.0  , -10086.1480954
-10086.1480954
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 0.000000000000000  , 0.0  , -7599.85131455
-7599.85131455
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 0.0  , -5110.13049221
-5110.13049221
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 0.000000000000000  , 0.0  , -10196.7979451
-10196.7979451
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 0.000000000000000  , 0.0  , -7643.79941623
-7643.79941623
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 0.000000000000000  , 0.0  , -5324.05597448
-5324.05597448
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 0.000000000000000  , 0.0  , -10321.0669668
-10321.0669668
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 1  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 1  , 0.0  , -5909.20719092
-5909.20719092
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 1  , 0.0  , -10086.4618185
-10086.4618185
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 1  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 1  , 0.0  , -5786.1424593
-5786.1424593
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 1  , 0.0  , -10154.3873678
-10154.3873678
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 1  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 1  , 0.0  , -5853.30748592
-5853.30748592
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 1  , 0.0  , -10255.5327358
-10255.5327358
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 10  , 0.0  , -9975.7902135
-9975.7902135
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 10  , 0.0  , -10014.8543315
-10014.8543315
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 10  , 0.0  , -10230.7874703
-10230.7874703
[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5110.1304922098898]

# Likelihood based on the partial list of SFS
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1,10];
ThetaList=[1,10,100];
results=[]
Reps=100; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 0.000000000000000  , 0.0  , -2337.02890526
-2337.02890526
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 0.000000000000000  , 0.0  , -1576.56551326
-1576.56551326
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 0.000000000000000  , 0.0  , -3065.09464016
-3065.09464016
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 0.000000000000000  , 0.0  , -2345.4279983
-2345.4279983
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 0.0  , -1569.62519957
-1569.62519957
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 0.000000000000000  , 0.0  , -3031.26008774
-3031.26008774
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 0.000000000000000  , 0.0  , -2340.03771926
-2340.03771926
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 0.000000000000000  , 0.0  , -1581.81045985
-1581.81045985
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 0.000000000000000  , 0.0  , -3145.99935957
-3145.99935957
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 1  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 1  , 0.0  , -1817.21117217
-1817.21117217
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 1  , 0.0  , -3047.49606705
-3047.49606705
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 1  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 1  , 0.0  , -1732.80522191
-1732.80522191
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 1  , 0.0  , -3036.44416147
-3036.44416147
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 1  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 1  , 0.0  , -1812.15345891
-1812.15345891
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 1  , 0.0  , -3157.4826845
-3157.4826845
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 10  , 0.0  , -3031.21732179
-3031.21732179
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 10  , 0.0  , -3019.13628679
-3019.13628679
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 10  , 0.0  , -inf
-inf
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 10  , 0.0  , -3108.13197622
-3108.13197622
[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[100, 15, 30, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -1569.6251995691184]

# dataset 2G - KINGMAN'S COALESCENT WITH GROWTH - SAMPLE SIZE n=2

set_random_seed(12456546)
np.random.seed(int(787867))
n=2
N0=1
TrueBeta=0
TrueG=10.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

3.82

beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 0.000000000000000  , 1.84642497925e-38  , -86.8849822079
-86.8849822079
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 3.27549135261e-43  , -97.8246911096
-97.8246911096
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 0.000000000000000  , 4.13081400383e-70  , -159.762482027
-159.762482027
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 1  , 7.19903227912e-51  , -115.457893131
-115.457893131
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 1  , 1.45992857523e-40  , -91.7250162063
-91.7250162063
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 1  , 9.10762500924e-70  , -158.971844534
-158.971844534
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 10  , 2.04450578169e-187  , -429.868256301
-429.868256301
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 10  , 7.32408674123e-32  , -71.6915545056
-71.6915545056
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 10  , 1.32896401033e-66  , -151.686216439
-151.686216439
[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[1000, 2, 30, 0, 10.0000000000000, 10, 0.000000000000000, 10, 10, -71.691554505644334]

# dataset 3G - UNBALANCED TREE WITH GROWTH

set_random_seed(124566)
np.random.seed(int(787607))
n=10
N0=1
TrueBeta=-1.9
TrueG=10.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
    tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
    TFS.append(tfs)

# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

14.63

set_random_seed(1245655)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[-1.9,0.0,10.0];
GrowthList=[0.0,1,10];
ThetaList=[1,10,100];
results=[]
Reps=500; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl =  -1.90000000000000  , 1  , 0.000000000000000  , 8.92612469103e-153  , -350.106536893
-350.106536893
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 10  , 0.000000000000000  , 3.72263396154e-213  , -489.136193336
-489.136193336
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 100  , 0.000000000000000  , 0.0  , -1373.7491213
-1373.7491213
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 0.000000000000000  , 1.04931918813e-194  , -446.653366479
-446.653366479
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 1.41195612133e-283  , -651.286605254
-651.286605254
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 0.000000000000000  , 0.0  , -1618.3152301
-1618.3152301
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 0.000000000000000  , 1.44084065211e-221  , -508.506078822
-508.506078822
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 0.000000000000000  , 0.0  , -746.607157891
-746.607157891
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 0.000000000000000  , 0.0  , -1772.76619681
-1772.76619681
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 1  , 1  , 7.46222329377e-232  , -532.189888176
-532.189888176
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 10  , 1  , 1.28135329712e-208  , -478.68978256
-478.68978256
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 100  , 1  , 0.0  , -1371.19351795
-1371.19351795
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 1  , 2.60833233832e-272  , -625.344434228
-625.344434228
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 1  , 8.00576248154e-286  , -656.459175004
-656.459175004
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 1  , 0.0  , -1620.4503413
-1620.4503413
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 1  , 1.00165324615e-308  , -709.194556761
-709.194556761
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 1  , 0.0  , -749.966447065
-749.966447065
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 1  , 0.0  , -1783.23476919
-1783.23476919
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 1  , 10  , 0.0  , -1618.83362947
-1618.83362947
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 10  , 10  , 1.02978520119e-228  , -524.960050965
-524.960050965
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 100  , 10  , 0.0  , -1346.84482594
-1346.84482594
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 10  , 0.0  , -1740.11270886
-1740.11270886
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 10  , 3.00817303193e-318  , -731.120727431
-731.120727431
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 10  , 0.0  , -1606.96795791
-1606.96795791
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 10  , 0.0  , -1739.04369827
-1739.04369827
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 10  , 0.0  , -826.161229329
-826.161229329
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 10  , 0.0  , -1755.56846088
-1755.56846088
[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[500, 10, 30, -1.90000000000000, 10.0000000000000, 10, -1.90000000000000, 1, 0.000000000000000, -350.10653689256128]

# dataset 4G - MORE BALANCED TREE WITH NO GROWTH

set_random_seed(12456625)
np.random.seed(int(78764507))
n=10
N0=1
TrueBeta=10
TrueG=0.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
    tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
    TFS.append(tfs)

# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

50.86

set_random_seed(1245655)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[-1.9,0.0,10,20,50];
GrowthList=[0.0];
ThetaList=[1,10,100];
results=[]
Reps=500; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl =  -1.90000000000000  , 1  , 0.000000000000000  , 0.0  , -1154.54179629
-1154.54179629
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 10  , 0.000000000000000  , 0.0  , -780.297563836
-780.297563836
beta, mu, g, lkl, logLkl =  -1.90000000000000  , 100  , 0.000000000000000  , 0.0  , -1767.67463989
-1767.67463989
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 0.000000000000000  , 0.0  , -951.140203765
-951.140203765
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 1.8497040141e-244  , -561.215737057
-561.215737057
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 0.000000000000000  , 0.0  , -1530.84525417
-1530.84525417
beta, mu, g, lkl, logLkl =  10  , 1  , 0.000000000000000  , 0.0  , -899.806101283
-899.806101283
beta, mu, g, lkl, logLkl =  10  , 10  , 0.000000000000000  , 2.09476181715e-241  , -554.183567556
-554.183567556
beta, mu, g, lkl, logLkl =  10  , 100  , 0.000000000000000  , 0.0  , -1511.60217057
-1511.60217057
beta, mu, g, lkl, logLkl =  20  , 1  , 0.000000000000000  , 0.0  , -883.534377758
-883.534377758
beta, mu, g, lkl, logLkl =  20  , 10  , 0.000000000000000  , 3.78782789921e-241  , -553.59121467
-553.59121467
beta, mu, g, lkl, logLkl =  20  , 100  , 0.000000000000000  , 0.0  , -1535.52126976
-1535.52126976
beta, mu, g, lkl, logLkl =  50  , 1  , 0.000000000000000  , 0.0  , -898.52707297
-898.52707297
beta, mu, g, lkl, logLkl =  50  , 10  , 0.000000000000000  , 2.07326978324e-237  , -544.983540073
-544.983540073
beta, mu, g, lkl, logLkl =  50  , 100  , 0.000000000000000  , 0.0  , -1507.09522843
-1507.09522843
[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[500, 10, 30, 10, 0.000000000000000, 10, 50, 10, 0.000000000000000, -544.98354007314447]

# dataset 4Gbis - SAME WITH LARGER BETA

set_random_seed(12456625)
np.random.seed(int(78764507))
n=10
N0=1
TrueBeta=50
TrueG=0.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
    tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
    TFS.append(tfs)

# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

54.55

set_random_seed(1245655)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[-1.9,0.0,20,50,70,100];
GrowthList=[0.0];
ThetaList=[10];
results=[]
Reps=500; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl =  -1.90000000000000  , 10  , 0.000000000000000  , 0.0  , -779.448068357
-779.448068357
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 1.15508278542e-247  , -568.594345952
-568.594345952
beta, mu, g, lkl, logLkl =  20  , 10  , 0.000000000000000  , 1.44022837574e-232  , -533.834939879
-533.834939879
beta, mu, g, lkl, logLkl =  50  , 10  , 0.000000000000000  , 4.92456037301e-226  , -518.78999601
-518.78999601
beta, mu, g, lkl, logLkl =  70  , 10  , 0.000000000000000  , 9.84020154202e-222  , -508.887414452
-508.887414452
beta, mu, g, lkl, logLkl =  100  , 10  , 0.000000000000000  , 5.46929618175e-225  , -516.382495984
-516.382495984
[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[500, 10, 30, 50, 0.000000000000000, 10, 70, 10, 0.000000000000000, -508.88741445191135]

# same computations, with more loci (100 instead of 30)
set_random_seed(1245655)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci2)]
BetaList=[-1.9,0.0,20,50,70,100];
GrowthList=[0.0];
ThetaList=[10];
results=[]
Reps=500; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci2,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl =  -1.90000000000000  , 10  , 0.000000000000000  , 0.0  , -2678.13043644
-2678.13043644
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 0.0  , -1906.68164481
-1906.68164481
beta, mu, g, lkl, logLkl =  20  , 10  , 0.000000000000000  , 0.0  , -1771.44766253
-1771.44766253
beta, mu, g, lkl, logLkl =  50  , 10  , 0.000000000000000  , 0.0  , -1771.74216851
-1771.74216851
beta, mu, g, lkl, logLkl =  70  , 10  , 0.000000000000000  , 0.0  , -1759.10585603
-1759.10585603
beta, mu, g, lkl, logLkl =  100  , 10  , 0.000000000000000  , 0.0  , -1798.32218429
-1798.32218429
[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[500, 10, 100, 50, 0.000000000000000, 10, 70, 10, 0.000000000000000, -1759.1058560291142]

# dataset 5G - LESS EXTREME DEPARTURES FROM Beta=0
set_random_seed(12456546546)
np.random.seed(int(78786767))
n=15
N0=1
TrueBeta=-1
TrueG=0.0
TrueTheta=10
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)

# infer
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,10.0,100.0];
ThetaList=[1,10,100];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 0.000000000000000  , 0.0  , -2485.44180413698
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 0.000000000000000  , 0.0  , -1595.65973228989
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 0.000000000000000  , 0.0  , -3009.74146339138
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 0.000000000000000  , 0.0  , -2497.49538365434
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 0.0  , -1629.09844522608
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 0.000000000000000  , 0.0  , -3054.63957052542
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 0.000000000000000  , 0.0  , -2524.46093727679
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 0.000000000000000  , 0.0  , -1721.68556524245
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 0.000000000000000  , 0.0  , -3233.84204749802
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 10.0000000000000  , 0.0  , -3029.81399220508
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 10.0000000000000  , 0.0  , -3063.60307631410
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 10.0000000000000  , 0.0  , -3196.15624867809
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 100.000000000000  , 0.0  , -infinity
[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[20, 15, 30, -1, 0.000000000000000, 10, -1.00000000000000, 10, 0.000000000000000, -1595.65973228989]

# dataset 6G - SAME WITH MORE BALANCED TREES

set_random_seed(12456546546)
np.random.seed(int(78786767))
n=15
N0=1
TrueBeta=10.0
TrueG=0.0
TrueTheta=10
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)

# infer
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,10.0,100.0];
ThetaList=[1,10,100];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 0.000000000000000  , 0.0  , -2426.00860033181
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 0.000000000000000  , 0.0  , -1807.30350551266
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 0.000000000000000  , 0.0  , -3353.27443791154
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 0.000000000000000  , 0.0  , -2386.08740026739
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 0.0  , -1764.68461686015
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 0.000000000000000  , 0.0  , -3296.49162836633
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 0.000000000000000  , 0.0  , -2318.48151263816
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 0.000000000000000  , 0.0  , -1651.92027054266
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 0.000000000000000  , 0.0  , -3295.30336629113
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 10.0000000000000  , 0.0  , -3334.17111890078
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 10.0000000000000  , 0.0  , -3336.53844782303
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 10.0000000000000  , 0.0  , -3275.05019894113
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10  , 100.000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100  , 100.000000000000  , 0.0  , -infinity
[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[20, 15, 30, 10.0000000000000, 0.000000000000000, 10, 10.0000000000000, 10, 0.000000000000000, -1651.92027054266]

# dataset 6G' - SAME WITH slightly MORE BALANCED TREES but with large theta
# this is to check against the best fitted beta and theta in the beta-splitting model fitted against the complex ms simulation from msdoc below

set_random_seed(12456546546)
np.random.seed(int(78786767))
n=15
N0=1
TrueBeta=5.0
TrueG=0.0
TrueTheta=100
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)

# infer
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,5.0];
GrowthList=[0.0];
ThetaList=[10,100,1000];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10  , 0.000000000000000  , 0.0  , -5347.9254917
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100  , 0.000000000000000  , 0.0  , -4181.90160246
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1000  , 0.000000000000000  , 0.0  , -7318.4541281
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10  , 0.000000000000000  , 0.0  , -4506.96656944
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100  , 0.000000000000000  , 0.0  , -3629.03506764
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1000  , 0.000000000000000  , 0.0  , -6476.40522229
beta, mu, g, lkl, logLkl =  5.00000000000000  , 10  , 0.000000000000000  , 0.0  , -4211.11864562
beta, mu, g, lkl, logLkl =  5.00000000000000  , 100  , 0.000000000000000  , 0.0  , -3316.56095862
beta, mu, g, lkl, logLkl =  5.00000000000000  , 1000  , 0.000000000000000  , 0.0  , -5760.49369151
[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[20, 15, 30, 5.00000000000000, 0.000000000000000, 100, 5.00000000000000, 100, 0.000000000000000, -3316.5609586219052]

Sanity check: increased mutation rate (i.e., more SNPs) gives better estimates

# dataset 7G with g=2 and increased theta
set_random_seed(12533)
np.random.seed(np.int(1245))
n=15
N0=1
TrueBeta=0.0
TrueG=2.0
TrueTheta=100.0
NumLoci=60

TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)
 #tfs[2]

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

364.35

set_random_seed(12451116)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1.0,2.0,3.0,5.0,10.0];
ThetaList=[100.0];
results=[]
Reps=200; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])

beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 0.000000000000000  , 0.0  , -4907.78167671551
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 0.000000000000000  , 0.0  , -4770.02044249150
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 0.000000000000000  , 0.0  , -4733.57552978068
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 1.00000000000000  , 0.0  , -4858.67216675298
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 1.00000000000000  , 0.0  , -4758.11451797914
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 1.00000000000000  , 0.0  , -4754.97193836299
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 2.00000000000000  , 0.0  , -4938.48773788207
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 2.00000000000000  , 0.0  , -4692.51974709802
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 2.00000000000000  , 0.0  , -4719.45055633706
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 3.00000000000000  , 0.0  , -5014.20031830045
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 3.00000000000000  , 0.0  , -4846.31369921827
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 3.00000000000000  , 0.0  , -4836.91856516081
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 5.00000000000000  , 0.0  , -5949.63834428161
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 5.00000000000000  , 0.0  , -5767.59346562225
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 5.00000000000000  , 0.0  , -5728.97568897941
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 10.0000000000000  , 0.0  , -13897.3116586207
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 10.0000000000000  , 0.0  , -13681.8415389646
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 10.0000000000000  , 0.0  , -infinity

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[200, 15, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 2.00000000000000, -4692.51974709802]

# dataset 8G - with increased n
set_random_seed(122323533)
np.random.seed(np.int(1211145))
n=30
N0=1
TrueBeta=0.0
TrueG=2.0
TrueTheta=100.0
NumLoci=60

TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)
 #tfs[2]

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

462.516666667

set_random_seed(12451116) # we can find signal of growth and beta over theta (gHat=1.0 instead of trueG=2.0)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1.0,2.0,3.0,5.0,10.0];
ThetaList=[1.0,10.0,100.0,1000.0];
results=[]
Reps=200; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,5000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
results

beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1.00000000000000  , 0.000000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10.0000000000000  , 0.000000000000000  , 0.0  , -16422.3009309419
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 0.000000000000000  , 0.0  , -16028.4737429655
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1000.00000000000  , 0.000000000000000  , 0.0  , -26795.5988144158
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1.00000000000000  , 0.000000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10.0000000000000  , 0.000000000000000  , 0.0  , -16128.6146952371
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 0.000000000000000  , 0.0  , -15662.5259045307
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1000.00000000000  , 0.000000000000000  , 0.0  , -26254.8450548033
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1.00000000000000  , 0.000000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10.0000000000000  , 0.000000000000000  , 0.0  , -16153.5678644303
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 0.000000000000000  , 0.0  , -15850.5218021710
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1000.00000000000  , 0.000000000000000  , 0.0  , -26538.9522004612
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1.00000000000000  , 1.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10.0000000000000  , 1.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 1.00000000000000  , 0.0  , -15961.8514298296
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1000.00000000000  , 1.00000000000000  , 0.0  , -27117.3003394868
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1.00000000000000  , 1.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10.0000000000000  , 1.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 1.00000000000000  , 0.0  , -15537.5894574872
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1000.00000000000  , 1.00000000000000  , 0.0  , -26370.1676322233
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1.00000000000000  , 1.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10.0000000000000  , 1.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 1.00000000000000  , 0.0  , -15705.6247847785
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1000.00000000000  , 1.00000000000000  , 0.0  , -26446.3182949772
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1.00000000000000  , 2.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10.0000000000000  , 2.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 2.00000000000000  , 0.0  , -16152.0144288948
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1000.00000000000  , 2.00000000000000  , 0.0  , -26887.1974399624
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1.00000000000000  , 2.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10.0000000000000  , 2.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 2.00000000000000  , 0.0  , -15768.9988127428
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1000.00000000000  , 2.00000000000000  , 0.0  , -26070.4776896200
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1.00000000000000  , 2.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10.0000000000000  , 2.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 2.00000000000000  , 0.0  , -15698.4926490336
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1000.00000000000  , 2.00000000000000  , 0.0  , -26270.5375180653
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1.00000000000000  , 3.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10.0000000000000  , 3.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 3.00000000000000  , 0.0  , -16327.6260523775
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1000.00000000000  , 3.00000000000000  , 0.0  , -26921.0641552768
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1.00000000000000  , 3.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10.0000000000000  , 3.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 3.00000000000000  , 0.0  , -15834.3765202690
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1000.00000000000  , 3.00000000000000  , 0.0  , -26239.3022865899
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1.00000000000000  , 3.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10.0000000000000  , 3.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 3.00000000000000  , 0.0  , -15929.4729365962
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1000.00000000000  , 3.00000000000000  , 0.0  , -26456.7767878908
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1.00000000000000  , 5.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10.0000000000000  , 5.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 5.00000000000000  , 0.0  , -17140.0109560034
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1000.00000000000  , 5.00000000000000  , 0.0  , -26809.5379888034
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1.00000000000000  , 5.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10.0000000000000  , 5.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 5.00000000000000  , 0.0  , -16710.2692409932
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1000.00000000000  , 5.00000000000000  , 0.0  , -26264.8777998098
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1.00000000000000  , 5.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10.0000000000000  , 5.00000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 5.00000000000000  , 0.0  , -17005.1235072424
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1000.00000000000  , 5.00000000000000  , 0.0  , -26267.6976961831
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1.00000000000000  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 10.0000000000000  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 100.000000000000  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  -1.00000000000000  , 1000.00000000000  , 10.0000000000000  , 0.0  , -26533.4086149551
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1.00000000000000  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 10.0000000000000  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 100.000000000000  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  0.000000000000000  , 1000.00000000000  , 10.0000000000000  , 0.0  , -26197.9966739432
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1.00000000000000  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 10.0000000000000  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 100.000000000000  , 10.0000000000000  , 0.0  , -infinity
beta, mu, g, lkl, logLkl =  10.0000000000000  , 1000.00000000000  , 10.0000000000000  , 0.0  , -26294.5184967989
[[200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 0.000000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 0.000000000000000, -16422.3009309419], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 0.000000000000000, -16028.4737429655], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 0.000000000000000, -26795.5988144158], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 0.000000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 0.000000000000000, -16128.6146952371], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 0.000000000000000, -15662.5259045307], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 0.000000000000000, -26254.8450548033], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 0.000000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 0.000000000000000, -16153.5678644303], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 0.000000000000000, -15850.5218021710], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 0.000000000000000, -26538.9522004612], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 1.00000000000000, -15961.8514298296], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 1.00000000000000, -27117.3003394868], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 1.00000000000000, -15537.5894574872], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 1.00000000000000, -26370.1676322233], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 1.00000000000000, -15705.6247847785], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 1.00000000000000, -26446.3182949772], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 2.00000000000000, -16152.0144288948], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 2.00000000000000, -26887.1974399624], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 2.00000000000000, -15768.9988127428], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 2.00000000000000, -26070.4776896200], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 2.00000000000000, -15698.4926490336], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 2.00000000000000, -26270.5375180653], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 3.00000000000000, -16327.6260523775], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 3.00000000000000, -26921.0641552768], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 3.00000000000000, -15834.3765202690], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 3.00000000000000, -26239.3022865899], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 3.00000000000000, -15929.4729365962], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 3.00000000000000, -26456.7767878908], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 5.00000000000000, -17140.0109560034], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 5.00000000000000, -26809.5379888034], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 5.00000000000000, -16710.2692409932], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 5.00000000000000, -26264.8777998098], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 5.00000000000000, -17005.1235072424], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 5.00000000000000, -26267.6976961831], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 10.0000000000000, -26533.4086149551], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 10.0000000000000, -26197.9966739432], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 10.0000000000000, -26294.5184967989]]

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 1.00000000000000, -15537.5894574872]

Producing data to test the algorithms under bottleneck model

# dataset B1 - RECENT MILD BOTTLENECK - SMALL SAMPLE SIZE
set_random_seed(656005)
np.random.seed(int(10043))
n=3
N0=1
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=50
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

# inference
set_random_seed(12456546)
np.random.seed(int(12345))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci2)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10];
ThetaList=[TrueTheta];
results=[]
Reps=500; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci2,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

                # report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][13]>results[MaxI][13]:
       MaxI=i
results[MaxI]

OUTPUT:
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.00100000000000000  , 1  , -3608.48793736
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.0100000000000000  , 1  , -1141.76086703
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.100000000000000  , 1  , -581.719746685
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 1.00000000000000  , 1  , -762.958933525
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 10  , 1  , -997.965051971
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.00100000000000000  , 1  , -1502.2515838
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.0100000000000000  , 1  , -679.440596264
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.100000000000000  , 1  , -530.748263574
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 1.00000000000000  , 1  , -770.327024986
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 10  , 1  , -967.593805413
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.00100000000000000  , 1  , -766.899089643
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.0100000000000000  , 1  , -760.754601725
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.100000000000000  , 1  , -765.649526704
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 1.00000000000000  , 1  , -781.649732233
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 10  , 1  , -769.549791934
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.00100000000000000  , 1  , -773.433328349
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.0100000000000000  , 1  , -773.235962739
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.100000000000000  , 1  , -773.176668532
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 1.00000000000000  , 1  , -767.145018239
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 10  , 1  , -774.944687062
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.00100000000000000  , 1  , -781.470446754
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.0100000000000000  , 1  , -774.247268458
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.100000000000000  , 1  , -754.533162385
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 1.00000000000000  , 1  , -780.409887946
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 10  , 1  , -774.066273148

The ML parameters are:
beta, mu, a, b, eps, N0, logLkl =  0.0, 50, 0.05, 0.15, 0.10, 1 , -530.748263574
true parameters are             =  0.0, 50, 0.05, 0.15, 0.01, 1     

so eps is over estimated.

# inference with less loci but more particles
set_random_seed(12456546)
np.random.seed(int(12345))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10];
ThetaList=[TrueTheta];
results=[]
Reps=1000; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci1,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.00100000000000000  , 1  , -1163.51882255
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.0100000000000000  , 1  , -365.696350645
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.100000000000000  , 1  , -173.175337965
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.00100000000000000  , 1.00000000000000  , 1  , -227.303776888
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 1  , -431.531782353
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.0100000000000000  , 1  , -193.513111235
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.100000000000000  , 1  , -152.052859489
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 1.00000000000000  , 1  , -225.725399602
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 10  , 1  , -287.570401128
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.00100000000000000  , 1  , -229.547239398
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.0100000000000000  , 1  , -229.415438751
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.100000000000000  , 1  , -224.735694798
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 1.00000000000000  , 1  , -229.699559894
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 10  , 1  , -224.716661467
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.00100000000000000  , 1  , -226.39264498
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.0100000000000000  , 1  , -225.275990284
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.100000000000000  , 1  , -231.04895718
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 1.00000000000000  , 1  , -221.720299295
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 10  , 1  , -225.739136262
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.00100000000000000  , 1  , -226.219278671
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.0100000000000000  , 1  , -226.809838563
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.100000000000000  , 1  , -228.359573602
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 1.00000000000000  , 1  , -218.018749341
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 10  , 1  , -229.290564857

# dataset B2 - RECENT MILD BOTTLENECK - LARGER SAMPLE SIZE
set_random_seed(656005)
np.random.seed(int(10043))
n=20
N0=1
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=50
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(124546)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=500; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

47.8333333333
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.00100000000000000  , 1  , -inf
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.0100000000000000  , 1  , -10891.5750864
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.100000000000000  , 1  , -4825.7295686
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 1.00000000000000  , 1  , -3846.83371819
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 10  , 1  , -4703.34086634
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.00100000000000000  , 1  , -7525.41896187
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.0100000000000000  , 1  , -5415.94989691
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.100000000000000  , 1  , -4043.44802175
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 1.00000000000000  , 1  , -3842.49433871
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 10  , 1  , -4268.33273154
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.00100000000000000  , 1  , -3846.97192437
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.0100000000000000  , 1  , -3865.06924946
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.100000000000000  , 1  , -3887.6771275
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 1.00000000000000  , 1  , -3814.16498995
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 10  , 1  , -3843.67431366
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.00100000000000000  , 1  , -3871.95893156
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.0100000000000000  , 1  , -3817.47356273
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.100000000000000  , 1  , -3879.2569377
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 1.00000000000000  , 1  , -3874.41545538
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 10  , 1  , -3859.62890493
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.00100000000000000  , 1  , -3834.15887986
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.0100000000000000  , 1  , -3895.44941551
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.100000000000000  , 1  , -3841.71901123
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 1.00000000000000  , 1  , -3827.64421938
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 10  , 1  , -3864.20955157

# dataset B3 - RECENT MILD BOTTLENECK -SMALL SAMPLE SIZE - HIGHER MUTATION RATE
set_random_seed(656005)
np.random.seed(int(10043))
n=10
N0=1
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=150
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(124546)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=500; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

100.633333333
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.00500000000000000  , 0.105000000000000  , 0.00100000000000000  , 1  , -inf
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.00500000000000000  , 0.105000000000000  , 0.0100000000000000  , 1  , -4143.05110455
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.00500000000000000  , 0.105000000000000  , 0.100000000000000  , 1  , -1565.313949
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.00500000000000000  , 0.105000000000000  , 1.00000000000000  , 1  , -1521.12915228
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.00500000000000000  , 0.105000000000000  , 10  , 1  , -1996.62453825
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.0500000000000000  , 0.150000000000000  , 0.00100000000000000  , 1  , -inf
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.0500000000000000  , 0.150000000000000  , 0.0100000000000000  , 1  , -1931.59287118
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.0500000000000000  , 0.150000000000000  , 0.100000000000000  , 1  , -1272.43522785
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.0500000000000000  , 0.150000000000000  , 1.00000000000000  , 1  , -1537.94575198
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.0500000000000000  , 0.150000000000000  , 10  , 1  , -1870.70083238
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.500000000000000  , 0.600000000000000  , 0.00100000000000000  , 1  , -1525.7970105
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.500000000000000  , 0.600000000000000  , 0.0100000000000000  , 1  , -1516.00736295
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.500000000000000  , 0.600000000000000  , 0.100000000000000  , 1  , -1532.82907382
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.500000000000000  , 0.600000000000000  , 1.00000000000000  , 1  , -1524.09678295
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 0.500000000000000  , 0.600000000000000  , 10  , 1  , -1508.19143274
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 1.00000000000000  , 1.10000000000000  , 0.00100000000000000  , 1  , -1516.78782758
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 1.00000000000000  , 1.10000000000000  , 0.0100000000000000  , 1  , -1504.09091572
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 1.00000000000000  , 1.10000000000000  , 0.100000000000000  , 1  , -1535.60390314
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 1.00000000000000  , 1.10000000000000  , 1.00000000000000  , 1  , -1510.11801896
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 1.00000000000000  , 1.10000000000000  , 10  , 1  , -1521.08752307
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 5.00000000000000  , 5.10000000000000  , 0.00100000000000000  , 1  , -1521.71754983
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 5.00000000000000  , 5.10000000000000  , 0.0100000000000000  , 1  , -1514.28750067
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 5.00000000000000  , 5.10000000000000  , 0.100000000000000  , 1  , -1531.01126246
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 5.00000000000000  , 5.10000000000000  , 1.00000000000000  , 1  , -1505.00522372
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 150  , 5.00000000000000  , 5.10000000000000  , 10  , 1  , -1533.90259185

# dataset B3bis - RECENT MILD BOTTLENECK -SMALL SAMPLE SIZE - EVEN HIGHER MUTATION RATE
set_random_seed(656005)
np.random.seed(int(10043))
n=10
N0=1
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=500
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(124546)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=500; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

367.7
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.00500000000000000  , 0.105000000000000  , 0.00100000000000000  , 1  , -inf
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.00500000000000000  , 0.105000000000000  , 0.0100000000000000  , 1  , -6259.71012625
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.00500000000000000  , 0.105000000000000  , 0.100000000000000  , 1  , -2004.72334607
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.00500000000000000  , 0.105000000000000  , 1.00000000000000  , 1  , -1901.35655085
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.00500000000000000  , 0.105000000000000  , 10  , 1  , -2394.39501464
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.0500000000000000  , 0.150000000000000  , 0.00100000000000000  , 1  , -inf
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.0500000000000000  , 0.150000000000000  , 0.0100000000000000  , 1  , -2854.14393912
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.0500000000000000  , 0.150000000000000  , 0.100000000000000  , 1  , -1638.08135101
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.0500000000000000  , 0.150000000000000  , 1.00000000000000  , 1  , -1900.43610638
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.0500000000000000  , 0.150000000000000  , 10  , 1  , -2352.27307662
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.500000000000000  , 0.600000000000000  , 0.00100000000000000  , 1  , -1889.43841676
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.500000000000000  , 0.600000000000000  , 0.0100000000000000  , 1  , -1887.05300588
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.500000000000000  , 0.600000000000000  , 0.100000000000000  , 1  , -1886.20850772
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.500000000000000  , 0.600000000000000  , 1.00000000000000  , 1  , -1920.82108268
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 0.500000000000000  , 0.600000000000000  , 10  , 1  , -1916.17760265
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 1.00000000000000  , 1.10000000000000  , 0.00100000000000000  , 1  , -1901.56034954
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 1.00000000000000  , 1.10000000000000  , 0.0100000000000000  , 1  , -1903.36337373
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 1.00000000000000  , 1.10000000000000  , 0.100000000000000  , 1  , -1892.45424539
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 1.00000000000000  , 1.10000000000000  , 1.00000000000000  , 1  , -1903.58549086
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 1.00000000000000  , 1.10000000000000  , 10  , 1  , -1875.72009448
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 5.00000000000000  , 5.10000000000000  , 0.00100000000000000  , 1  , -1904.70426324
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 5.00000000000000  , 5.10000000000000  , 0.0100000000000000  , 1  , -1883.57480008
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 5.00000000000000  , 5.10000000000000  , 0.100000000000000  , 1  , -1908.90330908
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 5.00000000000000  , 5.10000000000000  , 1.00000000000000  , 1  , -1883.73005132
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 500  , 5.00000000000000  , 5.10000000000000  , 10  , 1  , -1918.55885588

With small mutation rates

# dataset B4 - RECENT MILD BOTTLENECK WITH LOW MUTATION RATE - SAMPLE SIZE 3, 1000 LOCI
set_random_seed(656005)
np.random.seed(int(10043))
n=3
N0=1 # this is fixed at unity as in Hudson's ms
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=5
NumLoci=1000
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

# inference
set_random_seed(12456546)
np.random.seed(int(12345))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=200; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

1.284
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 0.00100000000000000  , 1  , -8199.16658344159
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 0.0100000000000000  , 1  , -4845.14168480036
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 0.100000000000000  , 1  , -2003.64279794536
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 1.00000000000000  , 1  , -3325.75934805400
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 10  , 1  , -4173.86472893806
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 0.00100000000000000  , 1  , -5531.93157332962
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 0.0100000000000000  , 1  , -3343.26549075243
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 0.100000000000000  , 1  , -1990.78719179484
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 1.00000000000000  , 1  , -3324.88864932211
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 10  , 1  , -4026.85032236171
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 0.00100000000000000  , 1  , -3442.50066084301
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 0.0100000000000000  , 1  , -3356.50634363227
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 0.100000000000000  , 1  , -3233.20706100601
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 1.00000000000000  , 1  , -3336.05667148710
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 10  , 1  , -3337.73846438619
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 0.00100000000000000  , 1  , -3415.27071905189
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 0.0100000000000000  , 1  , -3376.21020027055
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 0.100000000000000  , 1  , -3315.83668731960
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 1.00000000000000  , 1  , -3348.51498307126
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 10  , 1  , -3330.94713321818
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 0.00100000000000000  , 1  , -3335.65005101939
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 0.0100000000000000  , 1  , -3309.76392134207
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 0.100000000000000  , 1  , -3328.76412119589
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 1.00000000000000  , 1  , -3350.04170366840
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 10  , 1  , -3323.10652571638

We don't detect non-existant bottlenecks

# dataset B5 - NO BOTTLENECKS - SAMPLE SIZE 5, 100 LOCI
set_random_seed(656005)
np.random.seed(int(10043))
n=5
N0=1 # this is fixed at unity as in Hudson's ms
TrueBeta=0
TrueA=0.05       #dummy
TrueGap=0.1      #dummy
TrueB=TrueA+TrueGap
TrueEps=1.0    #no reduction in pop size
TrueTheta=5
NumLoci=100
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(12456546)
np.random.seed(int(12345))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=1000; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

19.71
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 0.00100000000000000  , 1  , -14893.7528941
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 0.0100000000000000  , 1  , -5307.23388516
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 0.100000000000000  , 1  , -1017.54379745
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 1.00000000000000  , 1  , -377.626413346
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 10  , 1  , -371.38288873
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 0.00100000000000000  , 1  , -23592.9313808
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 0.0100000000000000  , 1  , -4618.22388074
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 0.100000000000000  , 1  , -833.920280682
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 1.00000000000000  , 1  , -366.136998204
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 10  , 1  , -368.620578748
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 0.00100000000000000  , 1  , -5251.96210255
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 0.0100000000000000  , 1  , -1152.24064874
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 0.100000000000000  , 1  , -469.945393142
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 1.00000000000000  , 1  , -378.677354715
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 10  , 1  , -374.894630666
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 0.00100000000000000  , 1  , -2053.68035855
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 0.0100000000000000  , 1  , -698.33914174
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 0.100000000000000  , 1  , -405.69827546
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 1.00000000000000  , 1  , -358.672249758
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 10  , 1  , -385.701546557
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 0.00100000000000000  , 1  , -377.856904102
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 0.0100000000000000  , 1  , -383.864605249
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 0.100000000000000  , 1  , -370.258771678
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 1.00000000000000  , 1  , -353.622678588
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 10  , 1  , -372.561354959

# dataset B6 - NO BOTTLENECKS - SAMPLE SIZE 10, 30 LOCI
set_random_seed(656)
np.random.seed(int(143))
n=10
N0=1 # this is fixed at unity as in Hudson's ms
TrueBeta=0
TrueA=0.05       #dummy
TrueGap=0.1      #dummy
TrueB=TrueA+TrueGap
TrueEps=1.0    #no reduction in pop size
TrueTheta=5
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(12456)
np.random.seed(int(15))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=1000; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

30.0333333333
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 0.00100000000000000  , 1  , -inf
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 0.0100000000000000  , 1  , -4020.72436877
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 0.100000000000000  , 1  , -1294.12377314
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 1.00000000000000  , 1  , -463.904646526
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.00500000000000000  , 0.105000000000000  , 10  , 1  , -435.304203016
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 0.00100000000000000  , 1  , -inf
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 0.0100000000000000  , 1  , -4286.62612305
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 0.100000000000000  , 1  , -1046.51737715
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 1.00000000000000  , 1  , -455.09475188
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.0500000000000000  , 0.150000000000000  , 10  , 1  , -423.612616743
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 0.00100000000000000  , 1  , -2541.73302458
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 0.0100000000000000  , 1  , -936.115534231
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 0.100000000000000  , 1  , -544.935290976
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 1.00000000000000  , 1  , -452.606864125
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 0.500000000000000  , 0.600000000000000  , 10  , 1  , -483.952790351
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 0.00100000000000000  , 1  , -1092.86592868
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 0.0100000000000000  , 1  , -659.687709815
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 0.100000000000000  , 1  , -475.99086912
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 1.00000000000000  , 1  , -438.91106984
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 1.00000000000000  , 1.10000000000000  , 10  , 1  , -465.69934625
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 0.00100000000000000  , 1  , -461.106491242
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 0.0100000000000000  , 1  , -458.676482539
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 0.100000000000000  , 1  , -453.921258108
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 1.00000000000000  , 1  , -461.540988399
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 5  , 5.00000000000000  , 5.10000000000000  , 10  , 1  , -430.72283051

Simulating Complex Demographical and Structural Scenarios for $\beta$ -Effective Population Size $(\beta, N_e)$

The next cell is just testing if ms is already installed.

If not run the second cell below which will be downloading and compiling a slightly augmented version of Hudson's ms (just for extracting SFS from the the binary incidence matrices).

There is no need to run the second cell below is ms is already installed in /home/user/msdir/ms/.

1

%sh
ms 5 2 -T

ms 5 2 -T 
48467 38575 3753

//
(5:1.335576295853,(3:0.165384739637,(2:0.131984353065,(1:0.024002499878,4:0.024002499878):0.107981853187):0.033400386572):1.170191526413);

//
((1:0.169807076454,3:0.169807076454):0.590738654137,(4:0.208807185292,(2:0.181732580066,5:0.181732580066):0.027074605227):0.551738560200);

%sh
rm -rf /home/user/msdir &&
cd /home/user &&
wget https://github.com/lamastex/lce/raw/master/msSimulations/msdir.tar.gz &&
tar zxvf msdir.tar.gz &&
cd msdir &&
gcc -O3 -o ms ms.c streec.c rand1.c -lm &&
gcc -o sample_statsSFS sample_statsSFS.c tajd.c -lm
export PATH="/home/user/msdir:$PATH"
ls -al

--2018-02-12 21:38:14--  https://github.com/lamastex/lce/raw/master/msSimulations/msdir.tar.gz
Resolving github.com (github.com)... 192.30.253.112, 192.30.253.113
Connecting to github.com (github.com)|192.30.253.112|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/lamastex/lce/master/msSimulations/msdir.tar.gz [following]
--2018-02-12 21:38:15--  https://raw.githubusercontent.com/lamastex/lce/master/msSimulations/msdir.tar.gz
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 402688 (393K) [application/octet-stream]
Saving to: ‘msdir.tar.gz.6’

msdir.tar.gz.6      100%[===================>] 393.25K  --.-KB/s    in 0.004s  

2018-02-12 21:38:15 (86.8 MB/s) - ‘msdir.tar.gz.6’ saved [402688/402688]

msdir/
msdir/._rand1.c
msdir/.DS_Store
msdir/streec.c
msdir/params
msdir/rand1t.c
msdir/R_example_1/
msdir/R_example_1/thetabayesfig.pdf
msdir/R_example_1/._thetabayesfig.pdf
msdir/R_example_1/thetabayes.R
msdir/R_example_1/._thetabayes.R
msdir/._microsat.c
msdir/out
msdir/._msdoc.pdf
msdir/tst
msdir/._R_example_3
msdir/._streec.c
msdir/._R_example_1
msdir/sample_statsSFS.c
msdir/._readms.output.R
msdir/R_example_2/
msdir/R_example_2/._demog_infer_simple.R
msdir/R_example_2/._demoginfersimplefig.pdf
msdir/R_example_2/demoginfersimplefig.pdf
msdir/R_example_2/demog_infer_simple.R
msdir/._clms
msdir/._stats.c
msdir/seedms
msdir/readms.output.R
msdir/._sample_stats.c
msdir/ms.c
msdir/time2epoch.py
msdir/._seedms
msdir/ort
msdir/rand2t.c
msdir/._params
msdir/microsat.c
msdir/msdoc.pdf
msdir/rand2.c
msdir/tst2
msdir/._col1
msdir/._readme
msdir/readme
msdir/ms.h
msdir/sample_stats.c
msdir/tajd.c
msdir/._rand2t.c
msdir/._rand2.c
msdir/._migmat
msdir/._tajd.c
msdir/._.DS_Store
msdir/._ms.h
msdir/dist3.c
msdir/stats.c
msdir/._dist3.c
msdir/._ms.c
msdir/R_example_3/
msdir/R_example_3/demog_infer2d.R
msdir/R_example_3/demoginfer2dfig2.pdf
msdir/R_example_3/._demoginfer2dfig2.pdf
msdir/R_example_3/._demog_infer2d.R
msdir/._rand1t.c
msdir/migmat
msdir/clms
msdir/R_example_4/
msdir/R_example_4/._demoginfermcmcfig1.pdf
msdir/R_example_4/demog_infer_mcmc.R
msdir/R_example_4/demoginfermcmcfig2.pdf
msdir/R_example_4/demoginfermcmcfig1.pdf
msdir/R_example_4/._demoginfermcmcfig2.pdf
msdir/R_example_4/._demog_infer_mcmc.R
msdir/col1
msdir/._R_example_2
msdir/rand1.c
msdir/ms.out
msdir/ms
msdir/sample_statsSFS
msdir/._R_example_4
msdir/tst2Ts

ms.c: In function ‘main’:

ms.c:148:42: warning: ignoring return value of ‘scanf’, declared with attribute warn_unused_result [-Wunused-result]

  if( ntbs > 0 )  for( k=0; k<ntbs; k++)  scanf(" %s", tbsparamstrs[k] );

sample_statsSFS.c:14:1: warning: return type defaults to ‘int’ [-Wimplicit-int]

 main(argc,argv)

sample_statsSFS.c: In function ‘main’:

sample_statsSFS.c:67:9: warning: implicit declaration of function ‘biggerlist’ [-Wimplicit-function-declaration]

         biggerlist(nsam,maxsites, list) ;

total 428

drwxr-x--- 6 user user     62 Feb 12 21:38 .

drwx------ 6 user user     29 Feb 12 21:38 ..

-rw-r----- 1 user user   6148 Nov  8 22:39 .DS_Store
-rw-r----- 1 user user    222 Nov  8 22:39 ._.DS_Store

-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_1

-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_2

-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_3

-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_4

-rwxr----- 1 user user    222 Mar  9  2003 ._clms

-rwxr----- 1 user user    222 Jan  5  2004 ._col1

-rw-r----- 1 user user    222 Nov  5  2006 ._dist3.c
-rw-r----- 1 user user    222 Nov  7  2006 ._microsat.c
-rw-r----- 1 user user    222 Mar  9  2003 ._migmat
-rw-r--r-- 1 user user    975 Nov  8 20:23 ._ms.c
-rw-r----- 1 user user    514 Nov  8 19:15 ._ms.h
-rw-r--r-- 1 user user    177 Oct 16 19:03 ._msdoc.pdf
-rw-r----- 1 user user    222 Mar  8  2003 ._params
-rw-r----- 1 user user    222 May  3  2007 ._rand1.c
-rw-r----- 1 user user    222 May  3  2007 ._rand1t.c
-rw-r----- 1 user user    222 May  3  2007 ._rand2.c
-rw-r----- 1 user user    222 May  3  2007 ._rand2t.c
-rw-r----- 1 user user    222 Mar  8  2003 ._readme
-rw-r----- 1 user user    222 May 31  2007 ._readms.output.R
-rw-r----- 1 user user    222 May 21  2007 ._sample_stats.c
-rw-r----- 1 user user    222 Feb  4  2010 ._seedms
-rw-r----- 1 user user    222 Mar  8  2003 ._stats.c
-rw-r----- 1 user user    958 Mar 22  2017 ._streec.c
-rw-r----- 1 user user    222 Mar  8  2003 ._tajd.c

drwxr-x--- 2 user user      6 Jan 11 07:17 R_example_1

drwxr-x--- 2 user user      6 Jan 11 07:17 R_example_2

drwxr-x--- 2 user user      6 Jan 11 07:17 R_example_3

drwxr-x--- 2 user user      8 Jan 11 07:17 R_example_4

-rwxr----- 1 user user     43 Mar  9  2003 clms

-rwxr----- 1 user user     23 Jan  5  2004 col1

-rw-r----- 1 user user   1531 Nov  5  2006 dist3.c
-rw-r----- 1 user user   2937 Nov  7  2006 microsat.c
-rw-r----- 1 user user     69 Mar  9  2003 migmat

-rwxr-xr-x 1 user user  53248 Feb 12 21:38 ms

-rw-r--r-- 1 user user  36485 Jan 12 12:46 ms.c
-rw-r----- 1 user user   1226 Nov  8 19:15 ms.h
-rw-r--r-- 1 user user    534 Jan 11 07:19 ms.out
-rw-r--r-- 1 user user 226165 Oct 16 19:03 msdoc.pdf
-rw-r--r-- 1 user user 110655 Jan 12 08:12 ort
-rw-r--r-- 1 user user   1050 Jan 11 15:28 out
-rw-r----- 1 user user     16 Mar  8  2003 params
-rw-r----- 1 user user   1225 May  3  2007 rand1.c
-rw-r----- 1 user user    821 May  3  2007 rand1t.c
-rw-r----- 1 user user    816 May  3  2007 rand2.c
-rw-r----- 1 user user    580 May  3  2007 rand2t.c
-rw-r----- 1 user user   1386 Mar  8  2003 readme
-rw-r----- 1 user user   3501 May 31  2007 readms.output.R
-rw-r----- 1 user user   4675 May 21  2007 sample_stats.c

-rwxr-xr-x 1 user user  17976 Feb 12 21:38 sample_statsSFS

-rw-r----- 1 user user   5723 Jan 11 16:19 sample_statsSFS.c
-rw-r----- 1 user user     17 Jan 12 13:23 seedms
-rw-r----- 1 user user   1751 Mar  8  2003 stats.c
-rw-r----- 1 user user  22032 Mar 22  2017 streec.c
-rw-r----- 1 user user   1858 Mar  8  2003 tajd.c
-rw-r--r-- 1 user user   1183 Jan 12 13:20 time2epoch.py
-rw-r--r-- 1 user user   2419 Jan 12 13:23 tst
-rw-r--r-- 1 user user     71 Jan 12 06:31 tst2
-rw-r--r-- 1 user user     49 Jan 12 08:06 tst2Ts

%sh
ms

Too few command line arguments
usage: ms nsam howmany 
  Options: 
	 -t theta   (this option and/or the next must be used. Theta = 4*N0*u )
	 -s segsites   ( fixed number of segregating sites)
	 -T          (Output gene tree.)
	 -F minfreq     Output only sites with freq of minor allele >= minfreq.
	 -r rho nsites     (rho here is 4Nc)
		 -c f track_len   (f = ratio of conversion rate to rec rate. tracklen is mean length.) 
			 if rho = 0.,  f = 4*N0*g, with g the gene conversion rate.
	 -G alpha  ( N(t) = N0*exp(-alpha*t) .  alpha = -log(Np/Nr)/t
	 -I npop n1 n2 ... [mig_rate] (all elements of mig matrix set to mig_rate/(npop-1) 
		 -m i j m_ij    (i,j-th element of mig matrix set to m_ij.)
		 -ma m_11 m_12 m_13 m_21 m_22 m_23 ...(Assign values to elements of migration matrix.)
		 -n i size_i   (popi has size set to size_i*N0 
		 -g i alpha_i  (If used must appear after -M option.)
	   The following options modify parameters at the time 't' specified as the first argument:
	 -eG t alpha  (Modify growth rate of all pop's.)
	 -eg t i alpha_i  (Modify growth rate of pop i.) 
	 -eM t mig_rate   (Modify the mig matrix so all elements are mig_rate/(npop-1)
	 -em t i j m_ij    (i,j-th element of mig matrix set to m_ij at time t )
	 -ema t npop  m_11 m_12 m_13 m_21 m_22 m_23 ...(Assign values to elements of migration matrix.)
	 -eN t size  (Modify pop sizes. New sizes = size*N0 ) 
	 -en t i size_i  (Modify pop size of pop i.  New size of popi = size_i*N0 .)
	 -es t i proportion  (Split: pop i -> pop-i + pop-npop, npop increases by 1.
		 proportion is probability that each lineage stays in pop-i. (p, 1-p are admixt. proport.
		 Size of pop npop is set to N0 and alpha = 0.0 , size and alpha of pop i are unchanged.
	 -ej t i j   ( Join lineages in pop i and pop j into pop j
		  size, alpha and M are unchanged.
	 -eA t popi num ( num Ancient samples from popi at time t
	  -f filename     ( Read command line arguments from file filename.)
	  -p n ( Specifies the precision of the position output.  n is the number of digits after the decimal.)
	  -a (Print out allele ages.)
 See msdoc.pdf for explanation of these parameters.
ms 

To get SFS from ms output do as follows

n=15; NumLoci=100; TrueTheta=10; # make sure these agree with `ms n NumLoci -t TrueTheta` in the `%sh mc .. ` cell below
N0=1.0; TrueG=0.0; TrueBeta=0;

%sh ms 15 100 -t 10.0 | sample_statsSFS > output

%sh
pwd
head -n 2 output

/home/user/msdir
10 4 6 1 0 8 0 0 2 0 0 0 0 0
23 1 0 1 2 0 0 0 0 0 0 0 0 2

msSfsDataRaw = np.genfromtxt('/home/user/msdir/output', delimiter=' ')
msSfsMultiLocusData = [np.insert( np.insert(a, -1, 0., axis=0), 0, 0., axis=0) for a in msSfsDataRaw]
#msSfsMultiLocusData
# the Watterson estimator of theta under panmictic Kingman prior
print (mean([np.sum(msSfsMultiLocusData[j]) for j in range(0,NumLoci)])) / (sum(np.array([1.0/i for i in range(1,n,1)])))

9.86602647531

TrueBeta=0;
TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]

#print sfsMultiLocusData
print (mean([np.sum(msSfsMultiLocusData[j]) for j in range(0,NumLoci)])) / (sum(np.array([1.0/i for i in range(1,n,1)])))

10.1120620483

# dataset 1G (REPEATED via ms for testing purposes) - KINGMAN'S COALESCENT WITH NO GROWTH

# Likelihood based on the full list of SFS
set_random_seed(9811545)
np.random.seed(int(918675))

BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0];
ThetaList=[1,10,100];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    #GrRates = GetRatesForGrowth(g,n,1000)
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!!
            #print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta)
            print([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
# from multiple replicate datasets
#[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
# [20, 15, 100,      0,        0.0,   10,        0.0,     10,       0.0,  -5093.2238698216834]
# [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5147.4560231561763]

[20, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 1, 0.000000000000000, -8575.5514680327287]
[20, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 10, 0.000000000000000, -5217.8312013003724]
[20, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 100, 0.000000000000000, -7626.3498637938847]
[20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 1, 0.000000000000000, -8479.243014885189]
[20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5147.4560231561763]
[20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 100, 0.000000000000000, -7780.1337279236359]
[20, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 1, 0.000000000000000, -8562.8114291459169]
[20, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 10, 0.000000000000000, -5268.6403181905471]
[20, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 100, 0.000000000000000, -8040.3624073076298]
[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5147.4560231561763]

# dataset 1G (REPEATED via ms for testing purposes) - KINGMAN'S COALESCENT WITH NO GROWTH

# Likelihood based on the full list of SFS
set_random_seed(9871545)
np.random.seed(int(918675))

BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0];
ThetaList=[1,10,100];
results=[]
Reps=100; # number of particles
for g in GrowthList:
    #GrRates = GetRatesForGrowth(g,n,1000)
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!!
            #print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta)
            print([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
# from multiple replicate datasets
#[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
# [20, 15, 100,      0,        0.0,   10,        0.0,     10,       0.0,  -5093.2238698216834]
# [100, 15, 100,     0,        0.0,   10,        0.0,     10,       0.0,  -4719.0559858475472]

[100, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 1, 0.000000000000000, -8084.2905567486741]
[100, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 10, 0.000000000000000, -4803.2402300140157]
[100, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 100, 0.000000000000000, -7226.7640817544534]
[100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 1, 0.000000000000000, -8034.6893234419967]
[100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -4719.0559858475472]
[100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -4719.0559858475472]
[100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 100, 0.000000000000000, -7344.1201747338246]
[100, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 1, 0.000000000000000, -8187.6802601310701]
[100, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 10, 0.000000000000000, -4881.8177457366373]
[100, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 100, 0.000000000000000, -7583.3041235116634]
[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
[100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -4719.0559858475472]

Reps=800; beta=0.0; theta=10.0; growth=0.0;
GrRates=[0.,0.];
for i in range(2,n+1,1):
    GrRates.append(1.0*binomial(i,2))
print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]")
for i in range(5):
    logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps)
    print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
'''
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5458.7347191116787]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5254.6231241170144]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5297.870156684322]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5396.4229009903611]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5294.8534748499615]

[100, 15, 100, 0.000000000000000, 10.0000000000000, -4637.1829885643956]
[100, 15, 100, 0.000000000000000, 10.0000000000000, -4575.514309145261]
[100, 15, 100, 0.000000000000000, 10.0000000000000, -4612.6721939365625]
[100, 15, 100, 0.000000000000000, 10.0000000000000, -4643.4324787908445]
[100, 15, 100, 0.000000000000000, 10.0000000000000, -4647.9556338215662]

[200, 15, 100, 0.000000000000000, 10.0000000000000, -4542.2875568237114]
[200, 15, 100, 0.000000000000000, 10.0000000000000, -4532.9762385300965]
[200, 15, 100, 0.000000000000000, 10.0000000000000, -4538.9995541760582]
[200, 15, 100, 0.000000000000000, 10.0000000000000, -4518.7981532977683]
[200, 15, 100, 0.000000000000000, 10.0000000000000, -4464.6882092921878]

[400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115]

[800, 15, 100, 0.000000000000000, 10.0000000000000, -4253.587709948677]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4285.4570525672298]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4229.0499889288321]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4306.3893655985867]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4315.2945672085107]
'''

[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4253.587709948677]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4285.4570525672298]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4229.0499889288321]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4306.3893655985867]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4315.2945672085107]
'\n[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5458.7347191116787]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5254.6231241170144]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5297.870156684322]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5396.4229009903611]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5294.8534748499615]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4637.1829885643956]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4575.514309145261]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4612.6721939365625]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4643.4324787908445]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4647.9556338215662]\n\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4542.2875568237114]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4532.9762385300965]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4538.9995541760582]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4518.7981532977683]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4464.6882092921878]\n\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115]\n'

Reps=400
for i in range(5):
    logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps)
    print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])

[400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115]

# dataset 1G (REPEATED via ms for testing purposes) - KINGMAN'S COALESCENT WITH NO GROWTH

# Likelihood based on the full list of SFS
set_random_seed(987545)
np.random.seed(int(18675))

BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1.0,10.0];
ThetaList=[1,10,100];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!!
            print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

Stochastic Global Optimisation for Point Estimation

https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.differential_evolution.html

%time
# using same SFS data as above with standard coalescent
from scipy.optimize import differential_evolution
Reps=100; # number of particles with 100 is taking too long in this naive implementation, but REPS=20 has too much vriance in likelihood estimate!
parametersAndLogLkl=[]
for g in [0]:
    #GrRates = GetRatesForGrowth(g,n,1000) # slow - when g=0 we can do it analytically as below
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    #logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps)
    def negLkl(BT):
        f = -LklOfBetaThetaGrowthRates(RR(BT[0]),RR(BT[1]/4.0),g,GrRates,n,msSfsMultiLocusData,Reps);
        parametersAndLogLkl.append((BT[0],BT[1], -f)) # track the parameters for each likihood evaluation
        #print str(BT[0])+" , "+str(BT[1])+" : "+str(f)
        return f #negLogLkl_SplitPairCounts2(spc,B)

    bounds = [(-0.99,20.0),(1.0,100.0)]

    def mycallback(xk, convergence):
        print "mycallback "+str(xk)+" , "+ str(convergence)
        return convergence>1.0

    result = differential_evolution(negLkl, bounds, disp=True, popsize=15, callback=mycallback, recombination=0.7, maxiter=100)
    result.x, result.fun
#### see results in markdown for various runs of the same dataset

differential_evolution step 1: f(x)= 4840.34
mycallback [-0.07549672  8.09048459] , 0.0799216743708
differential_evolution step 2: f(x)= 4717.08
mycallback [  1.27948965  11.49985163] , 0.0890399767062
differential_evolution step 3: f(x)= 4717.08
mycallback [  1.27948965  11.49985163] , 0.117261285127
differential_evolution step 4: f(x)= 4717.08

REPS=100
differential_evolution step 1: f(x)= 4840.34
mycallback [-0.07549672  8.09048459] , 0.0799216743708
differential_evolution step 2: f(x)= 4717.08
mycallback [  1.27948965  11.49985163] , 0.0890399767062
differential_evolution step 3: f(x)= 4717.08
mycallback [  1.27948965  11.49985163] , 0.117261285127
differential_evolution step 4: f(x)= 4717.08

With just 10 particles (REPS=10) there is too much uncertainty in likelihood estimates for stable convergence of the stochastic optimisation algorithm.

REPS=10
differential_evolution step 1: f(x)= 5501.14
mycallback [  6.67340526  14.94882129] , 0.0856814618557
differential_evolution step 2: f(x)= 5454.21
mycallback [ 19.73690787  11.70403212] , 0.102796928814
differential_evolution step 3: f(x)= 5386.65
mycallback [  3.73906694  15.47908072] , 0.198542855185
differential_evolution step 4: f(x)= 5386.65
mycallback [  3.73906694  15.47908072] , 0.260265815629
differential_evolution step 5: f(x)= 5375.97differential_evolution step 1: f(x)= 5501.14
mycallback [  6.67340526  14.94882129] , 0.0856814618557
differential_evolution step 2: f(x)= 5454.21
mycallback [ 19.73690787  11.70403212] , 0.102796928814
differential_evolution step 3: f(x)= 5386.65
mycallback [  3.73906694  15.47908072] , 0.198542855185
differential_evolution step 4: f(x)= 5386.65
mycallback [  3.73906694  15.47908072] , 0.260265815629
differential_evolution step 5: f(x)= 5375.97
mycallback [  5.91624121  14.41968427] , 1.01879779978
(array([  5.91624121,  14.41968427]), 5375.9717345313738)

CPU time: 5724.49 s, Wall time: 5794.37 s

%time
from scipy.optimize import rosen, differential_evolution
Reps=100; # number of particles with 100 is taking too long in this naive implementation, but REPS=20 has too much vriance in likelihood estimate!
parametersAndLogLkl=[]
for g in [0]:
    #GrRates = GetRatesForGrowth(g,n,1000) # slow - when g=0 we can do it analytically as below
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    #logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps)
    def negLkl(BT):
        f = -LklOfBetaThetaGrowthRates(RR(BT[0]),RR(BT[1]/4.0),g,GrRates,n,msSfsMultiLocusData,Reps);
        parametersAndLogLkl.append((BT[0],BT[1], -f)) # track the parameters for each likihood evaluation
        #print str(BT[0])+" , "+str(BT[1])+" : "+str(f)
        return f #negLogLkl_SplitPairCounts2(spc,B)

    bounds = [(-0.99,20.0),(1.0,150.0)]

    def mycallback(xk, convergence):
        print "mycallback "+str(xk)+" , "+ str(convergence)
        return convergence>1.0

    result = differential_evolution(negLkl, bounds, disp=True, popsize=25, callback=mycallback, recombination=0.7, maxiter=100)
    # Reps=10 : popsize=15, recombination=0.7, maxiter=10 (array([  5.89973178,  61.12517964]), 24876.259063730031)
    # Reps =10; beta, mu, g, lkl, logLkl =  8.27364821134604  , 17.2526514877720  , 0.000000000000000  , 0.0  , -24674.2850081
    #bounds = [(0,2), (0, 2)]
    #result = differential_evolution(rosen, bounds)
    result.x, result.fun
#### see results in markdown for various runs of the same dataset

from scipy.optimize import rosen, differential_evolution
# this is very slow even with 10 particles .... :( but gets the ball-park answer betaHat,thetaHat, -logLkl = [ -0.02068417,  13.1918302 ]), 5217.4830580740918
Reps=2; # number of particles with 100 is taking too long in this naive implementation
for g in GrowthList:
    #GrRates = GetRatesForGrowth(g,n,1000) # slow - when g=0 we can do it analytically as below
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    #logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps)
    def negLkl(BT):
        f = -LklOfBetaThetaGrowthRates(RR(BT[0]),RR(BT[1]/4.0),g,GrRates,n,msSfsMultiLocusData,Reps)
        #print BT[0],BT[1],f
        return f #negLogLkl_SplitPairCounts2(spc,B)

    bounds = [(-0.99,10.0),(1.0,50.0)]
    result = differential_evolution(negLkl, bounds, disp=True, popsize=10,maxiter=2)
    #bounds = [(0,2), (0, 2)]
    #result = differential_evolution(rosen, bounds)
    result.x, result.fun

beta, mu, g, lkl, logLkl =  0.192638876372166  , 3.10305863202379  , 0.000000000000000  , 0.0  , -5959.36028135
beta, mu, g, lkl, logLkl =  2.67132512883625  , 12.4986014807133  , 0.000000000000000  , 0.0  , -7372.41751058
beta, mu, g, lkl, logLkl =  7.49799987485964  , 1.97524202923657  , 0.000000000000000  , 0.0  , -6026.14589091
beta, mu, g, lkl, logLkl =  -0.0285043371578597  , 7.46666808400616  , 0.000000000000000  , 0.0  , -6685.24590411
beta, mu, g, lkl, logLkl =  3.34795982225950  , 11.6497056655876  , 0.000000000000000  , 0.0  , -7278.767204
beta, mu, g, lkl, logLkl =  2.23636457002183  , 8.67266627919945  , 0.000000000000000  , 0.0  , -6876.02919136
beta, mu, g, lkl, logLkl =  3.78436729022016  , 6.09873216236385  , 0.000000000000000  , 0.0  , -6412.68424573
beta, mu, g, lkl, logLkl =  8.50757204007902  , 9.92991203928796  , 0.000000000000000  , 0.0  , -7113.78218942
beta, mu, g, lkl, logLkl =  6.06243292729823  , 0.548945366636503  , 0.000000000000000  , 0.0  , -7565.39248692
beta, mu, g, lkl, logLkl =  4.68965901099694  , 3.66388200572125  , 0.000000000000000  , 0.0  , -6246.55179671
beta, mu, g, lkl, logLkl =  4.35688866699788  , 10.6428001005425  , 0.000000000000000  , 0.0  , -7110.14703741
beta, mu, g, lkl, logLkl =  9.16936331214881  , 6.72651133507723  , 0.000000000000000  , 0.0  , -6544.03642779
beta, mu, g, lkl, logLkl =  9.85781452108139  , 1.30484065019712  , 0.000000000000000  , 0.0  , -6328.42461171
beta, mu, g, lkl, logLkl =  0.816395524051414  , 2.36332553286671  , 0.000000000000000  , 0.0  , -6016.30390969
beta, mu, g, lkl, logLkl =  7.21413656472797  , 9.31191598328838  , 0.000000000000000  , 0.0  , -7082.62897066
beta, mu, g, lkl, logLkl =  6.33536935538389  , 10.9597156910161  , 0.000000000000000  , 0.0  , -7149.73061066
beta, mu, g, lkl, logLkl =  8.27055336661729  , 4.97902583659657  , 0.000000000000000  , 0.0  , -6311.30477347
beta, mu, g, lkl, logLkl =  1.35918783360545  , 7.96286047667110  , 0.000000000000000  , 0.0  , -6628.6018027
beta, mu, g, lkl, logLkl =  -0.466719496508359  , 5.57985090985656  , 0.000000000000000  , 0.0  , -6575.92260353
beta, mu, g, lkl, logLkl =  5.18711156639093  , 4.37825362756296  , 0.000000000000000  , 0.0  , -6322.40115164
beta, mu, g, lkl, logLkl =  1.67494249329244  , 9.67447060439608  , 0.000000000000000  , 0.0  , -6985.4346397
beta, mu, g, lkl, logLkl =  7.69490521727455  , 0.689216967151480  , 0.000000000000000  , 0.0  , -7275.37049679
beta, mu, g, lkl, logLkl =  3.49039227568789  , 10.3317546849555  , 0.000000000000000  , 0.0  , -7087.19841019
beta, mu, g, lkl, logLkl =  6.92038038979015  , 5.26746915289674  , 0.000000000000000  , 0.0  , -6313.42500847
beta, mu, g, lkl, logLkl =  6.95244948857194  , 3.45260834656504  , 0.000000000000000  , 0.0  , -6130.01887935
beta, mu, g, lkl, logLkl =  -0.503205832268426  , 0.397528088764742  , 0.000000000000000  , 0.0  , -8442.41312608
beta, mu, g, lkl, logLkl =  3.78436729022016  , 1.35255462808006  , 0.000000000000000  , 0.0  , -6428.62478199
beta, mu, g, lkl, logLkl =  4.67615534518670  , 3.67883562201361  , 0.000000000000000  , 0.0  , -6209.3237204
beta, mu, g, lkl, logLkl =  6.06243292729823  , 8.23010766628102  , 0.000000000000000  , 0.0  , -6811.31764995
beta, mu, g, lkl, logLkl =  2.73429535133285  , 1.56861914031829  , 0.000000000000000  , 0.0  , -6179.49330062
beta, mu, g, lkl, logLkl =  2.96991796963890  , 3.64417865440990  , 0.000000000000000  , 0.0  , -6251.37216326
beta, mu, g, lkl, logLkl =  3.96307698196270  , 6.72651133507723  , 0.000000000000000  , 0.0  , -6590.47598199
beta, mu, g, lkl, logLkl =  5.16361988756579  , 10.8458915475144  , 0.000000000000000  , 0.0  , -7221.92190556
beta, mu, g, lkl, logLkl =  1.40874977574457  , 2.84325557950820  , 0.000000000000000  , 0.0  , -5991.35443199
beta, mu, g, lkl, logLkl =  0.00365597770044435  , 2.30213555712210  , 0.000000000000000  , 0.0  , -6134.61393871
beta, mu, g, lkl, logLkl =  8.71089382474266  , 1.40613546192693  , 0.000000000000000  , 0.0  , -6361.96423749
beta, mu, g, lkl, logLkl =  7.18125606152488  , 4.97902583659657  , 0.000000000000000  , 0.0  , -6309.35420598
beta, mu, g, lkl, logLkl =  1.57668630043092  , 7.96286047667110  , 0.000000000000000  , 0.0  , -6765.12718023
beta, mu, g, lkl, logLkl =  -0.744870852752415  , 9.24946143904115  , 0.000000000000000  , 0.0  , -7133.00470978
beta, mu, g, lkl, logLkl =  5.18711156639093  , 4.55582647046258  , 0.000000000000000  , 0.0  , -6297.25132715
differential_evolution step 1: f(x)= 5959.36
beta, mu, g, lkl, logLkl =  0.192638876372166  , 5.33506215577862  , 0.000000000000000  , 0.0  , -6233.43365021
beta, mu, g, lkl, logLkl =  1.66364081833354  , 5.11887655987214  , 0.000000000000000  , 0.0  , -6262.96622871
beta, mu, g, lkl, logLkl =  0.338538343347064  , 4.38826218742826  , 0.000000000000000  , 0.0  , -6367.89190528
beta, mu, g, lkl, logLkl =  3.14236397409478  , 3.35484287574148  , 0.000000000000000  , 0.0  , -6104.87506939
beta, mu, g, lkl, logLkl =  5.86812039065937  , 5.84266857414679  , 0.000000000000000  , 0.0  , -6520.56924811
beta, mu, g, lkl, logLkl =  2.23636457002183  , 0.827913436542197  , 0.000000000000000  , 0.0  , -6952.95971211
beta, mu, g, lkl, logLkl =  7.25750359508364  , 3.99293525185519  , 0.000000000000000  , 0.0  , -6087.1047052
beta, mu, g, lkl, logLkl =  3.86074216660030  , 5.37814437768196  , 0.000000000000000  , 0.0  , -6231.30999424
beta, mu, g, lkl, logLkl =  3.81423543974023  , 3.81494975942578  , 0.000000000000000  , 0.0  , -6042.67108275
beta, mu, g, lkl, logLkl =  3.69353766263250  , 2.46897426036644  , 0.000000000000000  , 0.0  , -6142.37195855
beta, mu, g, lkl, logLkl =  1.11747214582904  , 4.39525471785516  , 0.000000000000000  , 0.0  , -6180.68009054
beta, mu, g, lkl, logLkl =  -0.145170418590930  , 4.01785603039970  , 0.000000000000000  , 0.0  , -6256.13525601
beta, mu, g, lkl, logLkl =  9.85781452108139  , 3.04252144682377  , 0.000000000000000  , 0.0  , -6092.50468237
beta, mu, g, lkl, logLkl =  1.45813066782271  , 6.55180558519940  , 0.000000000000000  , 0.0  , -6667.19188988
beta, mu, g, lkl, logLkl =  3.69548178858569  , 2.58566058385317  , 0.000000000000000  , 0.0  , -6176.88539186
beta, mu, g, lkl, logLkl =  2.36042425504495  , 2.71042836372860  , 0.000000000000000  , 0.0  , -6025.08222264
beta, mu, g, lkl, logLkl =  -0.722998687542751  , 4.19536282629316  , 0.000000000000000  , 0.0  , -6278.88214632
beta, mu, g, lkl, logLkl =  1.35918783360545  , 4.36786205432543  , 0.000000000000000  , 0.0  , -6252.91619478
beta, mu, g, lkl, logLkl =  3.99384188106299  , 1.62152859537633  , 0.000000000000000  , 0.0  , -6417.61327055
beta, mu, g, lkl, logLkl =  4.56751725155766  , 4.55582647046258  , 0.000000000000000  , 0.0  , -6119.39907965
differential_evolution step 2: f(x)= 5959.36
beta, mu, g, lkl, logLkl =  0.192638876372166  , 3.10305863202379  , 0.000000000000000  , 0.0  , -6131.56057775
beta, mu, g, lkl, logLkl =  0.192638886372166  , 3.10305863202379  , 0.000000000000000  , 0.0  , -6078.25877917
beta, mu, g, lkl, logLkl =  0.192638876372166  , 3.10305863452379  , 0.000000000000000  , 0.0  , -6284.41688891
beta, mu, g, lkl, logLkl =  10.0000000000000  , 0.250000000000000  , 0.000000000000000  , 0.0  , -9137.76313061
beta, mu, g, lkl, logLkl =  10.0000000100000  , 0.250000000000000  , 0.000000000000000  , 0.0  , -9149.80345139
beta, mu, g, lkl, logLkl =  10.0000000000000  , 0.250000002500000  , 0.000000000000000  , 0.0  , -9309.20554021
beta, mu, g, lkl, logLkl =  2.43072387055414  , 2.45197750720723  , 0.000000000000000  , 0.0  , -6126.73018708
beta, mu, g, lkl, logLkl =  2.43072388055414  , 2.45197750720723  , 0.000000000000000  , 0.0  , -5983.82942447
beta, mu, g, lkl, logLkl =  2.43072387055414  , 2.45197750970723  , 0.000000000000000  , 0.0  , -6263.1118694
beta, mu, g, lkl, logLkl =  0.613854406619796  , 2.98052285615436  , 0.000000000000000  , 0.0  , -6088.00262364
beta, mu, g, lkl, logLkl =  0.613854416619796  , 2.98052285615436  , 0.000000000000000  , 0.0  , -6036.85110754
beta, mu, g, lkl, logLkl =  0.613854406619796  , 2.98052285865436  , 0.000000000000000  , 0.0  , -5986.87893203
beta, mu, g, lkl, logLkl =  0.354165950124406  , 3.05606880405018  , 0.000000000000000  , 0.0  , -6026.5351121
beta, mu, g, lkl, logLkl =  0.354165960124406  , 3.05606880405018  , 0.000000000000000  , 0.0  , -6062.35359389
beta, mu, g, lkl, logLkl =  0.354165950124406  , 3.05606880655018  , 0.000000000000000  , 0.0  , -6172.25697922
beta, mu, g, lkl, logLkl =  0.233351673085460  , 3.09121487562159  , 0.000000000000000  , 0.0  , -6060.43865358
beta, mu, g, lkl, logLkl =  0.233351683085460  , 3.09121487562159  , 0.000000000000000  , 0.0  , -6051.7120833
beta, mu, g, lkl, logLkl =  0.233351673085460  , 3.09121487812159  , 0.000000000000000  , 0.0  , -6019.3662997
beta, mu, g, lkl, logLkl =  0.207377943213967  , 3.09877089127978  , 0.000000000000000  , 0.0  , -6109.53214353
beta, mu, g, lkl, logLkl =  0.207377953213967  , 3.09877089127978  , 0.000000000000000  , 0.0  , -6125.10610218
beta, mu, g, lkl, logLkl =  0.207377943213967  , 3.09877089377978  , 0.000000000000000  , 0.0  , -6155.02525146
beta, mu, g, lkl, logLkl =  0.197165171051696  , 3.10174188800430  , 0.000000000000000  , 0.0  , -6245.20614406
beta, mu, g, lkl, logLkl =  0.197165181051696  , 3.10174188800430  , 0.000000000000000  , 0.0  , -6104.44659838
beta, mu, g, lkl, logLkl =  0.197165171051696  , 3.10174189050430  , 0.000000000000000  , 0.0  , -6082.23437022
beta, mu, g, lkl, logLkl =  0.194314419637385  , 3.10257119986789  , 0.000000000000000  , 0.0  , -6094.10604157
beta, mu, g, lkl, logLkl =  0.194314429637385  , 3.10257119986789  , 0.000000000000000  , 0.0  , -6019.46527039
beta, mu, g, lkl, logLkl =  0.194314419637385  , 3.10257120236789  , 0.000000000000000  , 0.0  , -6125.24844111
beta, mu, g, lkl, logLkl =  0.193080660132688  , 3.10293011274572  , 0.000000000000000  , 0.0  , -6272.78116254
beta, mu, g, lkl, logLkl =  0.193080670132688  , 3.10293011274572  , 0.000000000000000  , 0.0  , -6126.20077927
beta, mu, g, lkl, logLkl =  0.193080660132688  , 3.10293011524572  , 0.000000000000000  , 0.0  , -6054.71655418
beta, mu, g, lkl, logLkl =  0.192822702475048  , 3.10300515518706  , 0.000000000000000  , 0.0  , -6106.41518998
beta, mu, g, lkl, logLkl =  0.192822712475048  , 3.10300515518706  , 0.000000000000000  , 0.0  , -5987.8044366
beta, mu, g, lkl, logLkl =  0.192822702475048  , 3.10300515768706  , 0.000000000000000  , 0.0  , -6150.26753875
beta, mu, g, lkl, logLkl =  0.192682158303393  , 3.10304604088068  , 0.000000000000000  , 0.0  , -6182.05215543
beta, mu, g, lkl, logLkl =  0.192682168303393  , 3.10304604088068  , 0.000000000000000  , 0.0  , -6081.14160241
beta, mu, g, lkl, logLkl =  0.192682158303393  , 3.10304604338068  , 0.000000000000000  , 0.0  , -6057.26764101
beta, mu, g, lkl, logLkl =  0.192654742319382  , 3.10305401646237  , 0.000000000000000  , 0.0  , -6259.96931151
beta, mu, g, lkl, logLkl =  0.192654752319382  , 3.10305401646237  , 0.000000000000000  , 0.0  , -6226.55881597
beta, mu, g, lkl, logLkl =  0.192654742319382  , 3.10305401896237  , 0.000000000000000  , 0.0  , -6240.92287717
beta, mu, g, lkl, logLkl =  0.192644034400764  , 3.10305713150211  , 0.000000000000000  , 0.0  , -6161.27725395
beta, mu, g, lkl, logLkl =  0.192644044400764  , 3.10305713150211  , 0.000000000000000  , 0.0  , -6178.52908633
beta, mu, g, lkl, logLkl =  0.192644034400764  , 3.10305713400211  , 0.000000000000000  , 0.0  , -6189.52660829
beta, mu, g, lkl, logLkl =  0.192640537825439  , 3.10305814869055  , 0.000000000000000  , 0.0  , -6181.86672518
beta, mu, g, lkl, logLkl =  0.192640547825439  , 3.10305814869055  , 0.000000000000000  , 0.0  , -6021.01092336
beta, mu, g, lkl, logLkl =  0.192640537825439  , 3.10305815119055  , 0.000000000000000  , 0.0  , -6030.10581767
beta, mu, g, lkl, logLkl =  0.192639447831879  , 3.10305846578049  , 0.000000000000000  , 0.0  , -6017.14996387
beta, mu, g, lkl, logLkl =  0.192639457831879  , 3.10305846578049  , 0.000000000000000  , 0.0  , -6139.22015951
beta, mu, g, lkl, logLkl =  0.192639447831879  , 3.10305846828049  , 0.000000000000000  , 0.0  , -6019.72745884
beta, mu, g, lkl, logLkl =  0.192639050147600  , 3.10305846295499  , 0.000000000000000  , 0.0  , -6048.12201106
beta, mu, g, lkl, logLkl =  0.192639060147600  , 3.10305846295499  , 0.000000000000000  , 0.0  , -6290.14258722
beta, mu, g, lkl, logLkl =  0.192639050147600  , 3.10305846545499  , 0.000000000000000  , 0.0  , -6232.48324842
beta, mu, g, lkl, logLkl =  0.192639388774753  , 3.10305846536090  , 0.000000000000000  , 0.0  , -6131.48137252
beta, mu, g, lkl, logLkl =  0.192639398774753  , 3.10305846536090  , 0.000000000000000  , 0.0  , -6088.72194232
beta, mu, g, lkl, logLkl =  0.192639388774753  , 3.10305846786090  , 0.000000000000000  , 0.0  , -6106.59784861
beta, mu, g, lkl, logLkl =  0.192639430401962  , 3.10305846565666  , 0.000000000000000  , 0.0  , -5980.92387623
beta, mu, g, lkl, logLkl =  0.192639440401962  , 3.10305846565666  , 0.000000000000000  , 0.0  , -6381.52342578
beta, mu, g, lkl, logLkl =  0.192639430401962  , 3.10305846815666  , 0.000000000000000  , 0.0  , -6252.54723446
beta, mu, g, lkl, logLkl =  0.192639418026174  , 3.10305846556873  , 0.000000000000000  , 0.0  , -6133.75725714
beta, mu, g, lkl, logLkl =  0.192639428026174  , 3.10305846556873  , 0.000000000000000  , 0.0  , -6049.16364513
beta, mu, g, lkl, logLkl =  0.192639418026174  , 3.10305846806873  , 0.000000000000000  , 0.0  , -5971.18672324
beta, mu, g, lkl, logLkl =  0.192639427615362  , 3.10305846563686  , 0.000000000000000  , 0.0  , -6128.07517595
beta, mu, g, lkl, logLkl =  0.192639437615362  , 3.10305846563686  , 0.000000000000000  , 0.0  , -5994.94295566
beta, mu, g, lkl, logLkl =  0.192639427615362  , 3.10305846813686  , 0.000000000000000  , 0.0  , -6167.57086263
beta, mu, g, lkl, logLkl =  0.192639430127376  , 3.10305846565470  , 0.000000000000000  , 0.0  , -6339.1254703
beta, mu, g, lkl, logLkl =  0.192639440127376  , 3.10305846565470  , 0.000000000000000  , 0.0  , -5981.17374362
beta, mu, g, lkl, logLkl =  0.192639430127376  , 3.10305846815471  , 0.000000000000000  , 0.0  , -6157.72285945
beta, mu, g, lkl, logLkl =  0.192639430401962  , 3.10305846565666  , 0.000000000000000  , 0.0  , -6083.80831441
beta, mu, g, lkl, logLkl =  0.192639440401962  , 3.10305846565666  , 0.000000000000000  , 0.0  , -6173.33870907
beta, mu, g, lkl, logLkl =  0.192639430401962  , 3.10305846815666  , 0.000000000000000  , 0.0  , -6051.74194185
(array([  0.19263888,  12.41223453]), 5959.3602813466023)

If the above is run long enough it goes to (0.0,10.0).

A Complex Demographic Structured Sample

We will generates SFS using Hudson's ms using the scheme described by Fig. 3 on Page 19 of msdoc.pdf and estimate the effective beta-splitting population size.

n=15; NumLoci=100; TrueTheta=10.0; # make sure these agree with `ms n NumLoci -t TrueTheta ...` in the `%sh mc .. ` cell below
N0=1.0; TrueG=0.0; TrueBeta=0.0; # actually TrueBeta is unknown or not applicable

%sh
#ms 30 20 -t 10.0 -I 6 0 14 0 0 16 0 -m 1 2 2.5 -m 2 1 2.5 -m 2 3 2.5 -m 3 2 2.5 -m 4 5 2.5 -m 5 4 2.5 -m 5 6 2.5 -m 6 5 2.5 -em 2.0 3 4 2.5 -em 2.0 4 3 2.5 | sample_statsSFS > output2

%sh head -n 4 /home/user/msdir/output2

26 16 54 22 0 14 0 0 71 0 0 0 0
13 61 28 2 0 5 31 0 0 0 34 0 0
23 2 2 2 6 42 53 0 0 0 0 0 0
2 12 2 0 6 100 86 0 0 0 0 0 0

%sh
ms 15 100 -t 10.0 -I 6 0 7 0 0 8 0 -m 1 2 2.5 -m 2 1 2.5 -m 2 3 2.5 -m 3 2 2.5 -m 4 5 2.5 -m 5 4 2.5 -m 5 6 2.5 -m 6 5 2.5 -em 2.0 3 4 2.5 -em 2.0 4 3 2.5 | sample_statsSFS > output2

msSfsDataRaw = np.genfromtxt('/home/user/msdir/output2', delimiter=' ')
msSfsMultiLocusData = [np.insert( np.insert(a, -1, 0., axis=0), 0, 0., axis=0) for a in msSfsDataRaw]
#msSfsMultiLocusData
# the Watterson estimator of theta under panmictic Kingman prior
print (mean([np.sum(msSfsMultiLocusData[j]) for j in range(0,NumLoci)])) / (sum(np.array([1.0/i for i in range(1,n,1)])))

77.8733342835

# Likelihood based on the full list of SFS
# dataset Hudson's Fig 3 - fitting with Beta-splitting Effective Population Size
set_random_seed(9842245)
np.random.seed(int(4115345))

BetaList=[-1.0,0.0,5.0,10.0];
GrowthList=[0.0];
ThetaList=[1,10,100,1000];
results=[]
Reps=1000; # number of particles
for g in GrowthList:
    #GrRates = GetRatesForGrowth(g,n,1000)
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!!
            #print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta)
            print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
            results.append([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][5]>results[MaxI][5]:
       MaxI=i
results[MaxI]
#[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
#[20, 30, 20, 5.00000000000000, 100, -4912.0378080548035]
#[100, 30, 20, 5.00000000000000, 100, -4699.5032281274443]

Results of a run with the complex Fig.3 for msdoc.pdf chooses a more balanced beta and larger theta.

[1000, 15, 100, -1.00000000000000, 1, -inf]
[1000, 15, 100, -1.00000000000000, 10, -12061.540375880273]
[1000, 15, 100, -1.00000000000000, 100, -6100.9810259876103]
[1000, 15, 100, -1.00000000000000, 1000, -11881.563131517965]
[1000, 15, 100, 0.000000000000000, 1, -inf]
[1000, 15, 100, 0.000000000000000, 10, -11864.359615167663]
[1000, 15, 100, 0.000000000000000, 100, -5795.9160992013267]
[1000, 15, 100, 0.000000000000000, 1000, -11594.056550711328]
[1000, 15, 100, 5.00000000000000, 1, -inf]
[1000, 15, 100, 5.00000000000000, 10, -11736.957816906755]
[1000, 15, 100, 5.00000000000000, 100, -5734.6545990340501]
[1000, 15, 100, 5.00000000000000, 1000, -11523.970285428481]
[1000, 15, 100, 10.0000000000000, 1, -inf]
[1000, 15, 100, 10.0000000000000, 10, -11734.261194892686]
[1000, 15, 100, 10.0000000000000, 100, -5812.3707795154705]
[1000, 15, 100, 10.0000000000000, 1000, -11545.693530758796]
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
[1000, 15, 100, 5.00000000000000, 100, -5734.6545990340501]

There is variance in the likelihood estimates which decreases as a function of the number of particles.

[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5532.3529474155057]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5218.1004939060304]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5287.3870924684761]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5265.937482150388]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5280.421730323982]

[100, 30, 20, 5.00000000000000, 100.000000000000, -4751.6168398897598]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4677.2325203708433]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4811.3257313336544]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4799.1290494359873]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4739.2569980055923]

[1000, 30, 20, 5.00000000000000, 100.000000000000, -4443.9865339510743]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4484.4008663487266]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4451.5259917605035]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4475.0765597760737]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4441.5184195452257]

r10=[-5532.3529474155057, -5218.1004939060304, -5287.3870924684761, -5265.937482150388, -5280.421730323982]
r100=[-4751.6168398897598, -4677.2325203708433, -4811.3257313336544, -4799.1290494359873, -4739.2569980055923]
r1000=[-4443.9865339510743, -4484.4008663487266, -4451.5259917605035, -4475.0765597760737, -4441.5184195452257]
print('num of particles, mean log lkh, std loglkh')
print(10,mean(r10),std(r10))
print(100,mean(r100),std(r100))
print(1000,mean(r1000),std(r1000))

num of particles, mean log lkh, std loglkh
(10, -5316.83994925288, 123.470797173940)
(100, -4755.712227807167, 53.44271101877941)
(1000, -4459.301674276321, 19.30074717347509)

Reps=100; beta=5.0; theta=100.0; growth=0.0;
print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]")
for i in range(5):
    logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps)
    print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])

[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4751.6168398897598]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4677.2325203708433]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4811.3257313336544]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4799.1290494359873]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4739.2569980055923]

Reps=1000; beta=5.0; theta=100.0; growth=0.0;
print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]")
for i in range(5):
    logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps)
    print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])

[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4443.9865339510743]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4484.4008663487266]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4451.5259917605035]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4475.0765597760737]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4441.5184195452257]

︠5b212958-4b2c-465f-95a4-4df0ea57aa52︠

UnfoldingSFS

Full Likelihood Inference from the Site Frequency Spectrum of a Non-recombining Locus

Raazesh Sainudiin and Amandine Véber

The Sampler

Simulating Data

Computing Probabilities and Likelihoods

Computing useful statistics

Producing data to test the algorithms under the exponential growth model

Sanity check: increased mutation rate (i.e., more SNPs) gives better estimates

Producing data to test the algorithms under bottleneck model

With small mutation rates

We don't detect non-existant bottlenecks

Simulating Complex Demographical and Structural Scenarios for $\beta$ -Effective Population Size $(\beta, N_e)$

To get SFS from ms output do as follows

Stochastic Global Optimisation for Point Estimation

A Complex Demographic Structured Sample

Product

Resources

Company

UnfoldingSFS

Full Likelihood Inference from the Site Frequency Spectrum of a Non-recombining Locus

Raazesh Sainudiin and Amandine Véber

The Sampler

Simulating Data

Computing Probabilities and Likelihoods

Computing useful statistics

Producing data to test the algorithms under the exponential growth model

Sanity check: increased mutation rate (i.e., more SNPs) gives better estimates

Producing data to test the algorithms under bottleneck model

With small mutation rates

We don't detect non-existant bottlenecks

Simulating Complex Demographical and Structural Scenarios for β\betaβ-Effective Population Size (β,Ne)(\beta, N_e)(β,Ne​)

To get SFS from ms output do as follows

Stochastic Global Optimisation for Point Estimation

A Complex Demographic Structured Sample

Simulating Complex Demographical and Structural Scenarios for $\beta$ -Effective Population Size $(\beta, N_e)$