SharedFullLikelihoodInferenceSFS.sagewsOpen in CoCalc
Full Likelihood Inference Procedure based on SFS

UnfoldingSFS

Full Likelihood Inference from the Site Frequency Spectrum of a Non-recombining Locus

Raazesh Sainudiin and Amandine Véber

This is a public code accompanying the research article:

The Sampler

%auto
import pylab
import numpy as np

# set random seed of the random number generator
set_random_seed(0)
np.random.seed(int(1235))

# updating the topology

def DProduct(j,betaSpl):
    '''compute the product terms appearing in the probability of a split'''
    if j<2:
        prod=1
    else:
        x=j+betaSpl
        prod=x
        for k in range(1,j-1):
            prod= prod*(x-k)
    return prod

def BetaSplit(betaSpl,m,J):
    '''indicator of m being split into J(>= m/2) and m-J conditionally on no previous split, and the corresponding event probability, under the Beta-splitting model'''
    if (J==ceil(m/2)):
        I=1
        w=1
    else:
        mm = floor(m/2)
        if(betaSpl==0):
            #Kingman case
            D = sum(2/(m-1) for k in range(mm+1,J+1))
            if (m%2==0):
                D = D + 1/(m-1)
            p=(2/(m-1))/D
        else:
            D = 2*(sum(binomial(m,k)* DProduct(k,betaSpl)* DProduct(m-k,betaSpl) for k in range(mm+1,J+1)))
            if (m%2==0):
                D = D+  binomial(m,mm)* ((DProduct(mm,betaSpl))**2)
            p = 2*(binomial(m,J))*DProduct(J,betaSpl)*DProduct(m-J,betaSpl)/D
        U = np.random.random_sample()
        if (U<p):
            I=1
            w=p
        else:
            I=0
            w=1-p
    return [I,w]

def UncondSplit(betaSpl,m,J):
    '''compute the probability that m is split into J>=m/2 and m-J in Beta-splitting model without calling the beta functions'''
    if(betaSpl==0):
        #Kingman case
        if(2*J==m and m%2==0):
            p=1/(m-1)
        else:
            p=2/(m-1)
    else:
        mm= floor(m/2)
        D = 2*(sum(binomial(m,k)*DProduct(k,betaSpl)*DProduct(m-k,betaSpl) for k in range(mm+1,m)))
        if (m%2==0):
            aux2 = binomial(m,mm)* ((DProduct(mm,betaSpl))**2)
            D= D+ aux2
        N= binomial(m,J)* DProduct(J,betaSpl)* DProduct(m-J,betaSpl)
        if (2*J==m):
            p=N/D
        else:
            p=2*N/D
    return p

def IndexSplit(n,k,V):
    '''index of largest block if larger than k, according to n-vector V'''
    m=k-1
    for i in range(n-1,k-1,-1):
        if (V[i]>0):
            m=i
            break
    return m

def Sstep(betaSpl,n,j,C,F,w):
    '''column filling for j less than n/2 in Beta-splitting model, with control sequence C and proposal weight w. F[k,0] gives the size of the largest edge created by the split between epochs k-1 and k, F[k,n] gives the size of the edge split between epochs k-1 and k.'''
    J=n-j
    F[2,n]=n
    if (sum(F[2,J+1:n])==1):
        F[2,J]=0
    else:
        if (  (C[J]>0) or ( (j==floor(n/2)) and (n%2 == 1) )  ):
            F[2,J]=1
        else:
            A= BetaSplit(betaSpl,n,J)
            F[2,J]= A[0]
            w= w*(A[1])
    if F[2,J]>0:
        F[2,0]=J
        C[J]=0
    for k in range(3,j+2):
        if (sum(F[k,J+1:n])>0):
            F[k,J]=0
        else:
            m= IndexSplit(n,J,F[k-1,0:n+1])
            if (C[J]>0):
                F[k,J]=1
                F[k,n]=m
                C[J]=0
                if (F[k-1,J]==0):
                    F[k,0]=J
            else:
                if (m<J):
                    F[k,J]=0
                elif (m==J):
                    U=np.random.uniform()
                    q= (m-1)/(n-k+1)
                    if (U< q):
                        F[k,J]=0;
                        F[k,n]=J;
                        w= w*q
                    else:
                        F[k,J]=1
                        w=w*(1-q)
                elif (J==ceil(m/2)):
                    F[k,J]=1
                else:
                    F[k,n]=m
                    A=BetaSplit(betaSpl,m,J)
                    F[k,J]=A[0]
                    if (F[k,J]==1):
                        F[k,0]=J
                    w= w*(A[1])
    return [F,w]

def Hstep(betaSpl,n,C,F,w):
    '''column filling for j=n/2, n even and j>1'''
    j=floor(n/2)
    F[2,n]=n
    if (sum(F[2,j+1:n])==0):
        F[2,j]=2
        F[2,0]=j
        C[j]=0
    else:
        F[2,j]=0
    if (F[2,j]==0):
        for k in range(3,j+2):
            if (sum(F[k,j+1:n])>0):
                F[k,j]=0
            else:
                m= IndexSplit(n,j,F[k-1,0:n+1])
                if (C[j]>0):
                    F[k,j]=1
                    F[k,n]=m
                    C[j]=0
                    if (F[k-1,j]==0):
                        F[k,0]=j
                else:
                    if (m<j):
                        F[k,j]=0
                    elif (m==j):
                        U=np.random.uniform()
                        q= (m-1)/(n-k+1)
                        if (U<q):
                            F[k,j]=0
                            F[k,n]=j
                            w= w*q
                        else:
                            F[k,j]=1
                            w= w*(1-q)
                    elif (j==ceil(m/2)):
                        F[k,j]=1
                        F[k,0]=j
                    else:
                        F[k,n]=m
                        A = BetaSplit(betaSpl,m,j)
                        F[k,j]= A[0]
                        if (F[k,j]==1):
                            F[k,0]=j
                        w= w*(A[1])
    else:
        F[3,j]=1
        F[3,n]=j
        for k in range(4,j+2):
            m= IndexSplit(n,j,F[k-1,0:n+1])
            if (m<j):
                F[k,j]=0
            else:
                U=np.random.uniform()
                q= (m-1)/(n-k+1)
                if (U<q):
                    F[k,j]=0
                    F[k,n]=m
                    w= w*q
                else:
                    F[k,j]=1
                    w= w*(1-q)
    return [F,w]

def Lstep(betaSpl,n,j,C,F,w):
    '''column filling for j larger than n/2 in Beta-splitting model, with control sequence C'''
    J=n-j
    F[2,n]=n
    F[2,J]=F[2,j]
    if F[2,J]>0:
        C[J]=0
    for k in range(3,j+2):
        m=0
        for i in range(J+1,n):
            if F[k-1,i]>F[k,i]:
                m=i
                break
        if (m==0):
            if ((n-pylab.dot(pylab.arange(J,n,1),F[k-1,J:n]))-k+1+sum(F[k-1,J:n])==0):
                F[k,J]=F[k-1,J]-1
                F[k,n]=J
            elif ( (C[J]==0) or (sum(F[k,J+1:n])>0) or (F[k-1,J]>1) ):
                if (F[k-1,J]==0):
                    F[k,J]=0
                else:
                    U=np.random.uniform()
                    q= F[k-1,J]*(J-1)/(n-k+1-sum(F[k,l]*(l-1) for l in range(J+1,n)))
                    if (U< q):
                        F[k,J]=F[k-1,J]-1
                        F[k,n]=J
                        w= w*q
                    else:
                        F[k,J]=F[k-1,J]
                        w=w*(1-q)
            else:
                F[k,J]=F[k-1,J]
        elif (m > 2*J):
            F[k,n]=m
            F[k,J]=F[k-1,J]+(F[k,m-J]-F[k-1,m-J])
        elif ( (m==2*J) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])==0) ):
            F[k,n]=m
            F[k,J]=F[k-1,J]+2
            F[k,0]=J
        elif ( (m <= 2*J) and  (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])>0) ):
            F[k,n]=m
            F[k,J]=F[k-1,J]
        elif ( (J==ceil(m/2)) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])==0) ):
            F[k,n]=m
            F[k,J]=F[k-1,J]+1
            F[k,0]=J
        elif ( (m < 2*J) and (sum(F[k,J+1:m])-sum(F[k-1,J+1:m])==0) ):
            F[k,n]=m
            if ( (C[J]>0) and (F[k-1,J]==0) ):
                F[k,J]=1
                F[k,0]=J
            else:
                A= BetaSplit(betaSpl,m,J)
                F[k,J]=F[k-1,J] + A[0]
                if (A[0]==1):
                    F[k,0]=J
                w= w*(A[1])
    return [F,w]

def Twostep(betaSpl,n,C,F,w):
    '''column filling for j=n-2 in Beta-splitting model, with control sequence C useless here'''
    j=n-2
    F[2,n]=n
    F[2,2]=F[2,j]
    for k in range(3,j+2):
        m=0
        for i in range(3,n):
            if F[k-1,i]>F[k,i]:
                m=i
                break
        if (m==0):
            F[k,2]=F[k-1,2]-1
            F[k,n]=2
        elif (m > 4):
            F[k,n]=m
            F[k,2]=F[k-1,2]+F[k,m-2]-F[k-1,m-2]
        elif ( (m==4) and (F[k,3]-F[k-1,3]==0) ):
            F[k,n]=m
            F[k,2]=F[k-1,2]+2
            F[k,0]=2
        elif ( (m==4) and (F[k,3]-F[k-1,3]>0) ):
            F[k,n]=m
            F[k,2]=F[k-1,2]
        elif (m==3):
            F[k,n]=m
            F[k,2]=F[k-1,2]+1
            F[k,0]=2
    C[j]=0
    return [F,w]

def Onestep(betaSpl,n,C,F,w):
    '''column filling for j=n-1 in Beta-splitting model, with control sequence C useless here'''
    j=n-1
    F[2,n]=n
    F[2,1]=F[2,j]
    for k in range(3,j+1):
        m=0
        for i in range(3,n):
            if F[k-1,i]>F[k,i]:
                m=i
                break
        if (m>2):
            F[k,1]=F[k-1,1]+(F[k,m-1]-F[k-1,m-1])
            F[k,n]=m
        else:
            F[k,1]=F[k-1,1]+2
            F[k,0]=1
            F[k,n]=2
        F[n,1]=n
    F[n,n]=2
    F[n,0]=1
    C[j]=0
    return [F,w]


# Distributing the mutations

def GammaDensity(k,theta,x):
    '''Density of a Gamma(k,theta) at point x. theta inverse of the rate parameter in companion paper to comply with Python definition of gamma(a,b) distribution'''
    y= 1/(gamma(k)*(theta**k))
    z= y*(x**(k-1))*(exp(-x/theta))
    return z

def Mutate(A,n,j,s,theta,F,M,T,w1,w2):
    '''distribute the s mutations carried by n-j individuals using F and the information T on time already gathered. Updates at the same time the weight w1 associated with mutations and w2 associated with the times'''
    J=n-j
    if (s==0):
        M[0:n+1,J]= pylab.zeros(n+1)
    else:
        V=[(F[k,J]*T[k]) for k in range(0,n+1)]
        L= sum(V[2:j+2])
        W=[(V[k]/L) for k in range(0,n+1)]
        M[0:n+1,J]= np.random.multinomial(s,W)
        d = multinomial([int(i) for i in M[0:n+1,J]])
        dd = RR(d)
        for k in range(0,n+1):
            dd = dd * (RR(W[k])**(RR(M[k,J])))
        w1 = RR(w1)*dd
    for k in range (2,j+2):
        if F[k,J]>0:
            a=1+(sum(M[k,J:n]))
            b=1/(A[k]+theta*(sum(F[k,J:n])))
            T[k]= np.random.gamma( a, b )
            w2 = (w2) * (GammaDensity(RR(a),RR(b),RR(T[k])))
    return [M,T,w1,w2]


#Producing F and M from S (global algorithm)

def Control(n,S):
    '''build the control vector C from S, with sample size n'''
    C=pylab.zeros(n+1)
    for k in range(0,n+1):
        if (S[k]>0):
            C[k]=1
    return C

def BuildingFM(A,betaSpl,n,S,theta,F,M,T,w,w1,w2,k1,k2):
    '''modify topology F, mutation matrix M and epoch time vector T consistent with the SFS S, from column n-k1 to n-k2 in Beta-splitting model and with a priori rates A. Updates the weights w, w1 and w2 as well.'''
    C=Control(n,S)
    X=[F,w]
    Y=[M,T,w1,w2]
    if(n==2):
        X[0][2,0:3]=[1,2,2]
        C[1]=0
        Y=Mutate(A,n,1,S[1],theta,X[0],M,T,w1,w2)
    elif(n==3):
        if k1==1:
            X[0][2,0:4]=[2,1,1,3]
            C[2]=0
            Y=Mutate(A,n,1,S[2],theta,X[0],M,T,w1,w2)
        if k2==2:
            X[0][3,0:4]=[1,3,0,2]
            C[1]=0
            Y=Mutate(A,n,2,S[1],theta,X[0],Y[0],Y[1],Y[2],Y[3])
    else:
        K2=min(ceil(n/2)-1,k2)
        for j in range(k1,K2+1):
            X=Sstep(betaSpl,n,j,C,X[0],X[1])
            Y=Mutate(A,n,j,S[n-j],theta,X[0],Y[0],Y[1],Y[2],Y[3])
        if (n%2 == 0) and (k2 > K2) and (k1 <= floor(n/2)):
            j=floor(n/2)
            X=Hstep(betaSpl,n,C,X[0],X[1])
            Y=Mutate(A,n,j,S[j],theta,X[0],Y[0],Y[1],Y[2],Y[3])
        K1=max(k1,floor(n/2)+1)
        K2=min(k2,n-3)
        for j in range(K1,K2+1):
            X=Lstep(betaSpl,n,j,C,X[0],X[1])
            Y=Mutate(A,n,j,S[n-j],theta,X[0],Y[0],Y[1],Y[2],Y[3])
        if (k1 < n-1) and (k2 > n-3):
            X=Twostep(betaSpl,n,C,X[0],X[1])
            Y=Mutate(A,n,n-2,S[2],theta,X[0],Y[0],Y[1],Y[2],Y[3])
        if (k2 == n-1):
            X=Onestep(betaSpl,n,C,X[0],X[1])
            Y=Mutate(A,n,n-1,S[1],theta,X[0],Y[0],Y[1],Y[2],Y[3])
    return [X[0],Y[0],Y[1],X[1],Y[2],Y[3]]

def MakeHistory(A,betaSpl,n,S,theta):
    '''return SFS-history (F,M,T) compatible with SFS S, and proposal weights (w,w1,w2). n sampling size, A vector of a priori rates for epoch times, betaSpl a priori parameter of Beta-splitting model, theta scaled mutation rate'''
    M = pylab.zeros((n+1,n+1))
    F = pylab.zeros((n+1,n+1))
    T = pylab.zeros(n+1)
    for k in range(0,n+1):
        if (A[k]==0):
            T[k]=1
        else:
            T[k]=np.random.exponential(1/A[k])
    k1=1
    k2=n-1
    X=BuildingFM(A,betaSpl,n,S,theta,F,M,T,1,1,1,k1,k2)
    return X

def MakeHistory0(A,betaSpl,n,theta):
    '''return F and M-matrices independent of the mutation, and times sampled according to the a priori coalescence rates'''
    S = pylab.zeros(n+1)
    M = pylab.zeros((n+1,n+1))
    F = pylab.zeros((n+1,n+1))
    T = [1 for k in range(0,n+1)]
    k1=1
    k2=n-1
    X= BuildingFM(A,betaSpl,n,S,theta,F,M,T,1,1,1,k1,k2)
    MM = pylab.zeros((n+1,n+1))
    TT = pylab.zeros(n+1)
    for k in range(0,n+1):
        if (A[k]==0):
            TT[k]=1
        else:
            TT[k]=np.random.exponential(1/A[k])
    return [X[0],MM,TT,X[3],1.,ProbOfTHom(A,TT,n)]



Simulating Data

%auto
#Making SFS from F and T

def MakeSFS(F,T,theta,n):
    '''create SFS out of L=TF and theta = per locus scaled mutation rate'''
    S=pylab.zeros(n+1)
    L=pylab.dot(T,F)
    for i in range(1,n):
        S[i]=np.random.poisson(theta*L[i])
    return S


#Producing epoch times

def KingmanRates(n):
    '''return Kingman rates'''
    return pylab.array([0.,0.]+[k*(k-1)/2.0 for k in range(2,n+1)])

def EpochtimeConst(R,N0):
    '''vector of exponential times with rates given in vector R and constant pop size N0'''
    return [np.random.exponential(N0/r) for r in R]

def EpochtimeExpGr(Rk,N0,g,s):
    '''return a sample of time Tk of epoch k with sample-size dependent rate Rk,
       starting from time s, in an exponentially growing population with growth rate g from
       initial pop size N0'''
    if g==0:
        return np.random.exponential(N0/Rk)
    else:
        return (1./g)*log(1.0 - (exp(-g*s)*N0*g*(log(np.random.uniform())))/Rk)

def EpochTimesExpGr(n,R,N0,g):
    '''return a sample vector of inhomogenous exponential RVs, with sample-size dependent
       epoch-specific coalescent rates given by vector R in an exponentially growing population
       with growth rate g from initial pop size N0'''
    t=pylab.zeros(n+1)
    t[n]=EpochtimeExpGr(R[n],N0,g,0)
    for k in range(n-1,1,-1):
        t[k]=EpochtimeExpGr(R[k],N0,g,t[n:k+1:-1].sum())
    return t

def EpochtimeBtl(k,N0,a,b,eps,s):
    '''return a sample of time Tk of epoch k starting from time s, under Kingman coalescent with pop size eps times N0 between times a and b, and N0 otherwise'''
    if (s>=b):
        return np.random.exponential(2*N0/(k*(k-1)))
    else:
        U= np.random.uniform()
        if (s>a):
            if ( U < 1- exp(-k*(k-1)*(b-s)/( 2*eps*N0 )) ):
                return -2*eps*N0*log(1-U)/( k*(k-1) )
            else:
                return -2*N0*log(1-U)/( k*(k-1) ) - (b-s)*(1/eps - 1)
        else:
            if ( U < 1- exp(-k*(k-1)*(a-s)/(2*N0)) ):
                return -2*N0*log(1-U)/(k*(k-1))
            elif ( U < 1- exp(- k*(k-1)*( (b-a)/eps + a-s )/(2*N0))):
                return eps*(-2*N0*log(1-U)/(k*(k-1)) + (a-s)*(1/eps - 1) )
            else:
                return -2*N0*log(1-U)/(k*(k-1)) - (b-a)*(1/eps - 1)

def EpochTimesBtl(n,N0,a,b,eps):
    '''return a sample vector of inhomogenous exponential RVs, under Kingman coalescent with pop size eps times N0 between times a and b, and N0 otherwise. n is sample size'''
    t=pylab.zeros(n+1)
    t[n]=EpochtimeBtl(n,N0,a,b,eps,0)
    for k in range(n-1,1,-1):
        t[k]=EpochtimeBtl(k,N0,a,b,eps,t[n:k+1:-1].sum())
    return t


#Creating SFS and associated pair (F,T) in classical scenarios

def MakeS(betaSpl,N0,g,mu,n):
    '''make SFS data S from given parameters and return T,F,S for exponential growth model of demography (g=0 is standard Kingman coalescent)'''
    T=EpochTimesExpGr(n,KingmanRates(n),N0,g)
    A = [1 for k in range(0,n+1)] # this is just dummy
    X = MakeHistory0(A,betaSpl,n,mu)
    S=MakeSFS(X[0],T,mu,n)
    return [T,X[0],S]

def MakeS_Btl(betaSpl,N0,a,b,eps,mu,n):
    '''make sfs data S from given parameters and return T,F,S for bottleneck model of demography'''
    T=EpochTimesBtl(n,N0,a,b,eps)
    A = [1 for k in range(0,n+1)] # this is just dummy
    X = MakeHistory0(A,betaSpl,n,mu)
    S=MakeSFS(X[0],T,mu,n)
    return [T,X[0],S]



Computing Probabilities and Likelihoods

%auto
# Probability of a Topology in Beta-splitting model

def ProbOfF(betaSpl,f,n):
    '''compute probability of topology F in (unconditioned) Beta-splitting model, given beta and sample size n'''
    f[1,n]=1
    pr=RR(1.0)
    for k in range(2,n+1):
        mk=floor(f[k,n])
        lk=floor(f[k,0])
        pr=pr*(f[k-1,mk]*(mk-1)/(n-k+1))*UncondSplit(betaSpl,mk,lk)
    return pr


# Densities of rate-parametrized epoch times

def ProbOfTHom(r,t,n):
    '''return the density of the time vector t=[t_2,...t_n] under coalescent model with epoch-dependent rates r(k)'''
    a2 = RR(1.0)
    for i in range(2,n+1):
        a2 = a2 * RR(r[i]) * exp(-(RR(t[i]) * RR(r[i])))
    return a2

def ProbOfTExG(g,t,n):
    '''return the density of the time vector t=[t_2,...t_n] under Kingman coalescent with exponentially growing population with growth rate g'''
    if(g==0):
        a=RR(1.0)
        for k in range(2,n+1,1):
            kC2=RR(k*(k-1)/2);
            a = a * kC2 * exp(-kC2*t[k])
    else:
        Tnk = RR(t[n]);
        a = RR(n*(n-1)/2 * exp(g*Tnk - n*(n-1)/(2*g) *  1 * (exp(g*t[n])-1) ) );
        for k in range(n-1,1,-1):
            kC2=RR(k*(k-1)/2);
            a = a * kC2 * exp(g*(Tnk+t[k]) - ( kC2/g * exp(g*(Tnk)) * (exp(g*t[k])-1) ) );
            Tnk=Tnk+t[k];
    return a

def ProbOfTBtl(a,b,eps,N0,t,n):
    '''return the density of the time vector t=[t_2,...,t_n] under the Kingman with bottleneck scenario where pop size is eps*N0 between times a and b, and N0 otherwise'''
    if (t[n]>= b):
        bin= RR(n*(n-1)/(2*N0))
        x= RR(bin*exp( - bin*((b-a)*(1/eps -1)+t[n]) ))
        for k in range(2,n):
            bink= RR(k*(k-1)/(2*N0))
            x = x* bink * exp(-bink*t[k])
    else:
        bart=pylab.zeros(n+2)
        bart[n]=t[n]
        for k in range(n-1,1,-1):
            bart[k]= bart[k+1]+t[k]
        K=2
        KK=2
        for k in range(n,1,-1):
            if (bart[k]>a):
                K=k+1
                break
        for k in range(K-1,1,-1):
            if (bart[k]>b):
                KK=k+1
                break
        if (K==KK) and (K>2):
            bin = RR((K-1)*(K-2)/(2*N0))
            x = RR(bin*exp( -bin*( (b-a)*(1/eps -1)+ t[K-1] ) ))
            for j in range(2,K-1):
                binj= j*(j-1)/(2*N0)
                x= x * RR(binj * exp(- binj * t[j]))
            for j in range(K,n+1):
                binj= j*(j-1)/(2*N0)
                x= x * RR(binj * exp(- binj*t[j]))
        elif (K==KK) and (K==2):
            x = ProbOfTHom(KingmanRates(n)/N0,t,n)
        elif (KK>2):
            bin1 = (K-1)*(K-2)/(2*N0)
            bin2 = (KK-1)*(KK-2)/(2*N0)
            x = RR((bin1/eps)*(bin2)*exp( -bin1*(a-bart[K] + (bart[K-1]-a)/eps) -bin2*( (b-bart[KK])/eps + bart[KK-1]-b ) ))
            for j in range(2,KK-1):
                binj= j*(j-1)/(2*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
            for j in range(KK,K-1):
                binj= j*(j-1)/(2*eps*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
            for j in range(K,n+1):
                binj= j*(j-1)/(2*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
        else:
            bin1 = (K-1)*(K-2)/(2*N0)
            x = RR((bin1/eps)*exp( -bin1*(a-bart[K] + (bart[K-1]-a)/eps) ))
            for j in range(2,K-1):
                binj= j*(j-1)/(2*eps*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
            for j in range(K,n+1):
                binj= j*(j-1)/(2*N0)
                x= x* RR( binj*exp(-binj*t[j]) )
    return x


# Conditional probabilities

def ProbOfMGivenFTmu(f,m,t,mu,n):
    '''return the P(M | F,T,mu), mu scaled mutation rate (theta earlier)'''
    a=RR(1.0)
    for j in range(1,n):
        for k in range(2,n+1):
            r=mu*RR(f[k,j])*RR(t[k])
            a=a*exp(-r)*r^(RR(m[k,j]))/factorial(RR(m[k,j]))
    return a

def ProbOfSfsGivenFTmu(f,s,t,mu,n):
    '''return the P(S | F,T,mu) of SFS S given topology F and epoch times T, mu scaled mutation rate'''
    a=RR(1.0)
    L=pylab.dot(t,f)
    for j in range(1,n):
        r=mu*RR(L[j])
        a=a*exp(-r)*r^(RR(s[j]))/factorial(RR(s[j]))
    return a

def ProbOfFMTGivenRatesBetaMu(f,m,t,r,betaSpl,mu,n):
    '''return P(F,M,T) given rates, beta, and mu in Beta-splitting model with epoch-wise homogeneous coalescence rates given by vector r'''
    return ProbOfF(betaSpl,f,n)*ProbOfTHom(r,t,n)*ProbOfMGivenFTmu(f,m,t,mu,n)

def ProbOfFMTGivenGrowthBetaMu(f,m,t,g,betaSpl,mu,n):
    '''return P(F,M,T) given exponential growth rate, beta, and mu in Beta-splitting model with exponential growth of pop size'''
    return ProbOfF(betaSpl,f,n)*ProbOfTExG(g,t,n)*ProbOfMGivenFTmu(f,m,t,mu,n)

def ProbOfFSfsTGivenGrowthBetaMu(f,s,t,g,betaSpl,mu,n):
    '''return P(F,S,T) given exponential growth rate, betaSpl, and mu'''
    return ProbOfF(betaSpl,f,n)*ProbOfTExG(g,t,n)*ProbOfSfsGivenFTmu(f,s,t,mu,n)

def ProbOfFSfsTGivenBottleneckBetaMu(f,s,t,a,b,eps,N0,betaSpl,mu,n):
    '''return P(F,S,T) given bottleneck model parameters: a,b,eps,N0, and betaSpl and mu'''
    return ProbOfF(betaSpl,f,n)*ProbOfTBtl(a,b,eps,N0,t,n)*ProbOfSfsGivenFTmu(f,s,t,mu,n)

# Multi-locus based likelihood of parameters under different scenarios

def LklOfBetaThetaGrowthRates(betaSpl,Mu,g,GrRates,n,sfsList,Reps):
    '''return the log likelihood across loci for beta, Mu and growth rate g, specifying a vector GrRates of a priori coalescence rates. sfsList is a list of SFS at all loci considered, Reps is the number of particles produced per locus to evaluate the per-locus likelihood.'''
    jointLklAcrossLoci=RR(1.0)
    jointLogLklAcrossLoci=RR(0.0)
    for i in range(len(sfsList)):
        sfs=sfsList[i]
        Lkl2=pylab.zeros(Reps)
        for r in range(Reps):
            Y=MakeHistory(GrRates,betaSpl,n,sfs,Mu)
            wt=Y[3]*Y[4]*Y[5]
            numwt2=ProbOfFSfsTGivenGrowthBetaMu(Y[0],sfs,Y[2],g,betaSpl,Mu,n)
            Lkl2[r]=numwt2/wt
        Lkl2Mean=Lkl2.mean()
        jointLklAcrossLoci=jointLklAcrossLoci*Lkl2Mean
        jointLogLklAcrossLoci=jointLogLklAcrossLoci+log(Lkl2Mean)
    print 'beta, mu, g, lkl, logLkl = ',betaSpl,' ,',Mu,' ,',g,' ,',jointLklAcrossLoci,' ,',jointLogLklAcrossLoci
    return jointLogLklAcrossLoci

def LklOfBetaThetaGrowth(betaSpl,Mu,g,n,sfsList,Reps):
    '''return the log likelihood estimates for beta parameter betaSpl, mutation rate Mu and growth rate g from list of observed S, by first computing a priori mean coalescence rates and then produce Reps particles per locus.'''
    RateSamples=1000  # you may need to increase this for larger n!
    GrRates = GetRatesForGrowth(g,n,RateSamples)
    jointLogLklAcrossLoci = LklOfBetaThetaGrowthRates(betaSpl,Mu,g,GrRates,n,sfsList,Reps)
    return jointLogLklAcrossLoci

def LklOfBetaThetaBottleneckRates(betaSpl,Mu,a,b,eps,N0,btlnckRates,n,sfsList,Reps):
    '''return the lok likelihood across loci for beta, Mu, a, b, eps and N0, specifying a vector btlnckRates of a priori coalescence rates. sfsList is a list of SFS at all loci considered, Reps is the number of particles produced per locus to evaluate the per-locus likelihood.'''
    jointLogLklAcrossLoci=RR(0.0)
    for i in range(len(sfsList)):
        sfs=sfsList[i]
        Lkl2=pylab.zeros(Reps)
        for r in range(Reps):
            Y=MakeHistory(btlnckRates,betaSpl,n,sfs,Mu)
            wt=Y[3]*Y[4]*Y[5]
            numwt2=ProbOfFSfsTGivenBottleneckBetaMu(Y[0],sfs,Y[2],a,b,eps,N0,betaSpl,Mu,n)
            Lkl2[r]=numwt2/wt
        Lkl2Mean=Lkl2.mean()
        jointLogLklAcrossLoci=jointLogLklAcrossLoci+log(Lkl2Mean)
    print 'beta, mu, a, b, eps, N0, logLkl = ',betaSpl,' ,',Mu,' ,',a,' ,',b,' ,',eps,' ,',N0,' ,',jointLogLklAcrossLoci
    return jointLogLklAcrossLoci

def LklOfBetaThetaBottleneck(betaSpl,Mu,a,b,eps,N0,n,sfsList,Reps):
    '''return the log likelihood estimates for beta, Mu and bottleneck parameters a,b,eps,N0 for observed SFS in sfsList, by first computing a priori mean coalescence rates in this scenario. Reps is number of particles produced for each SFS'''
    RateSamples=1000 # you may need to increase this for larger n!
    BtlRates = GetRatesForBottleneck(a,b,eps,N0,n,RateSamples)
    jointLogLklAcrossLoci = LklOfBetaThetaBottleneckRates(betaSpl,Mu,a,b,eps,N0,BtlRates,n,sfsList,Reps)
    return jointLogLklAcrossLoci


###############################################################################
# This Halton sequence code is by Richard P. Muller ([email protected]).
# http://pistol.googlecode.com/svn/trunk/Pistol/QuasiRandom.py
prime = [2.0,3.0,5.0,7.0,11.0,13.0,17.0,19.0,23.0,29.0,31.0,37.0,
         41.0,43.0,47.0,53.0,59.0,61.0,67.0,71.0,73.0,79.0,83.0,
         89.0,97.0,101.0,103.0,107.0,109.0,113.0,127.0,131.0,137.0,
         139.0,149.0,151.0,157.0,163.0,167.0,173.0]
class HaltonSequence:
    """Another algorithm to compute a d-dimensional Halton Sequence.
    This one gives the same results as the other."""
    def __init__(self,dim,atmost=10000):
        if dim > len(prime): raise "dim < %d " % len(prime)
        self.dim = dim

        self.err = 0.9/(atmost*prime[dim-1])

        self.prime = [0]*self.dim
        self.quasi = [0]*self.dim
        for i in range(self.dim):
            self.prime[i] = 1./prime[i]
            self.quasi[i] = self.prime[i]
        return

    def __call__(self):
        for i in range(self.dim):
            t = self.prime[i]
            f = 1.-self.quasi[i]
            g = 1.
            h = t
            while f-h < self.err:
                g = h
                h = h*t
            self.quasi[i] = g+h-f
        return self.quasi
# END of Halton sequence code by Richard P. Muller ([email protected]).
###############################################################################

Computing useful statistics

%auto
def WattersonsThetaEst(s,n):
    '''return Watterson's estimate of mutation rate based on number of segregating sites s and average length of Kingman's coalescent with sample size n'''
    est=0
    for i in range(1,n):
        est=est+(2/i)
    return s/est

def GetRatesForGrowth(g,n,RateSamples):
    '''return the mean estimated rates for each epoch under growth g and sample size n, based on RateSamples samplings from the law'''
    A=KingmanRates(n)
    B=pylab.zeros(n+1)

    for j in range(RateSamples):
        B=B+EpochTimesExpGr(n,A,1,g)
    for j in range(2,n+1):
        B[j]= RateSamples/B[j]
    return B

def GetRatesForBottleneck(a,b,eps,N0,n,RateSamples):
    '''return the mean estimated rates for each epoch under Kingman's coalescent with bottleneck model with start and end times a<b, with initial population size N0 and population reduction eps, based on RateSamples samplings from the law'''
    B=pylab.zeros(n+1)
    for j in range(RateSamples):
        B=B+EpochTimesBtl(n,N0,a,b,eps)
    for j in range(2,n+1):
        B[j]= RateSamples/B[j]
    return B


Producing data to test the algorithms under the exponential growth model

# dataset 1G - KINGMAN'S COALESCENT WITH NO GROWTH

# Produce a list of NumLoci2 SFS from Kingman's coalescent with growth
set_random_seed(12456546546)
np.random.seed(int(78786767))
n=15
N0=1 # this is fixed at unity as in Hudson's ms
TrueBeta=0
TrueG=0.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)
# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites
70.05

# Likelihood based on the full list of SFS
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci2)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1,10];
ThetaList=[1,10,100];
results=[]
Reps=100; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci2,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 0.000000000000000 , 0.0 , -7667.7511712 -7667.7511712 beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -5226.21481293 -5226.21481293 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -10086.1480954 -10086.1480954 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -7599.85131455 -7599.85131455 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -5110.13049221 -5110.13049221 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -10196.7979451 -10196.7979451 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 0.0 , -7643.79941623 -7643.79941623 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -5324.05597448 -5324.05597448 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -10321.0669668 -10321.0669668 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 1 , 0.0 , -5909.20719092 -5909.20719092 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 1 , 0.0 , -10086.4618185 -10086.4618185 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 1 , 0.0 , -5786.1424593 -5786.1424593 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 1 , 0.0 , -10154.3873678 -10154.3873678 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 1 , 0.0 , -5853.30748592 -5853.30748592 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 1 , 0.0 , -10255.5327358 -10255.5327358 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 10 , 0.0 , -9975.7902135 -9975.7902135 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10 , 0.0 , -10014.8543315 -10014.8543315 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10 , 0.0 , -10230.7874703 -10230.7874703 [Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5110.1304922098898]
# Likelihood based on the partial list of SFS
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1,10];
ThetaList=[1,10,100];
results=[]
Reps=100; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 0.000000000000000 , 0.0 , -2337.02890526 -2337.02890526 beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -1576.56551326 -1576.56551326 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -3065.09464016 -3065.09464016 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -2345.4279983 -2345.4279983 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -1569.62519957 -1569.62519957 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -3031.26008774 -3031.26008774 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 0.0 , -2340.03771926 -2340.03771926 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -1581.81045985 -1581.81045985 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -3145.99935957 -3145.99935957 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 1 , 0.0 , -1817.21117217 -1817.21117217 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 1 , 0.0 , -3047.49606705 -3047.49606705 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 1 , 0.0 , -1732.80522191 -1732.80522191 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 1 , 0.0 , -3036.44416147 -3036.44416147 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 1 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 1 , 0.0 , -1812.15345891 -1812.15345891 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 1 , 0.0 , -3157.4826845 -3157.4826845 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 10 , 0.0 , -3031.21732179 -3031.21732179 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10 , 0.0 , -3019.13628679 -3019.13628679 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10 , 0.0 , -inf -inf beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10 , 0.0 , -3108.13197622 -3108.13197622 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [100, 15, 30, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -1569.6251995691184]

# dataset 2G - KINGMAN'S COALESCENT WITH GROWTH - SAMPLE SIZE n=2

set_random_seed(12456546)
np.random.seed(int(787867))
n=2
N0=1
TrueBeta=0
TrueG=10.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites
3.82
beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 1.84642497925e-38 , -86.8849822079 -86.8849822079 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 3.27549135261e-43 , -97.8246911096 -97.8246911096 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 4.13081400383e-70 , -159.762482027 -159.762482027 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 1 , 7.19903227912e-51 , -115.457893131 -115.457893131 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 1 , 1.45992857523e-40 , -91.7250162063 -91.7250162063 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 1 , 9.10762500924e-70 , -158.971844534 -158.971844534 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10 , 2.04450578169e-187 , -429.868256301 -429.868256301 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10 , 7.32408674123e-32 , -71.6915545056 -71.6915545056 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10 , 1.32896401033e-66 , -151.686216439 -151.686216439 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [1000, 2, 30, 0, 10.0000000000000, 10, 0.000000000000000, 10, 10, -71.691554505644334]
# dataset 3G - UNBALANCED TREE WITH GROWTH

set_random_seed(124566)
np.random.seed(int(787607))
n=10
N0=1
TrueBeta=-1.9
TrueG=10.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
    tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
    TFS.append(tfs)

# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites
14.63
set_random_seed(1245655)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[-1.9,0.0,10.0];
GrowthList=[0.0,1,10];
ThetaList=[1,10,100];
results=[]
Reps=500; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
beta, mu, g, lkl, logLkl = -1.90000000000000 , 1 , 0.000000000000000 , 8.92612469103e-153 , -350.106536893 -350.106536893 beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 0.000000000000000 , 3.72263396154e-213 , -489.136193336 -489.136193336 beta, mu, g, lkl, logLkl = -1.90000000000000 , 100 , 0.000000000000000 , 0.0 , -1373.7491213 -1373.7491213 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 1.04931918813e-194 , -446.653366479 -446.653366479 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 1.41195612133e-283 , -651.286605254 -651.286605254 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -1618.3152301 -1618.3152301 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 1.44084065211e-221 , -508.506078822 -508.506078822 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -746.607157891 -746.607157891 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -1772.76619681 -1772.76619681 beta, mu, g, lkl, logLkl = -1.90000000000000 , 1 , 1 , 7.46222329377e-232 , -532.189888176 -532.189888176 beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 1 , 1.28135329712e-208 , -478.68978256 -478.68978256 beta, mu, g, lkl, logLkl = -1.90000000000000 , 100 , 1 , 0.0 , -1371.19351795 -1371.19351795 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 1 , 2.60833233832e-272 , -625.344434228 -625.344434228 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 1 , 8.00576248154e-286 , -656.459175004 -656.459175004 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 1 , 0.0 , -1620.4503413 -1620.4503413 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 1 , 1.00165324615e-308 , -709.194556761 -709.194556761 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 1 , 0.0 , -749.966447065 -749.966447065 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 1 , 0.0 , -1783.23476919 -1783.23476919 beta, mu, g, lkl, logLkl = -1.90000000000000 , 1 , 10 , 0.0 , -1618.83362947 -1618.83362947 beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 10 , 1.02978520119e-228 , -524.960050965 -524.960050965 beta, mu, g, lkl, logLkl = -1.90000000000000 , 100 , 10 , 0.0 , -1346.84482594 -1346.84482594 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10 , 0.0 , -1740.11270886 -1740.11270886 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10 , 3.00817303193e-318 , -731.120727431 -731.120727431 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10 , 0.0 , -1606.96795791 -1606.96795791 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10 , 0.0 , -1739.04369827 -1739.04369827 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10 , 0.0 , -826.161229329 -826.161229329 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10 , 0.0 , -1755.56846088 -1755.56846088 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [500, 10, 30, -1.90000000000000, 10.0000000000000, 10, -1.90000000000000, 1, 0.000000000000000, -350.10653689256128]
# dataset 4G - MORE BALANCED TREE WITH NO GROWTH

set_random_seed(12456625)
np.random.seed(int(78764507))
n=10
N0=1
TrueBeta=10
TrueG=0.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
    tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
    TFS.append(tfs)

# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites
50.86
set_random_seed(1245655)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[-1.9,0.0,10,20,50];
GrowthList=[0.0];
ThetaList=[1,10,100];
results=[]
Reps=500; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
beta, mu, g, lkl, logLkl = -1.90000000000000 , 1 , 0.000000000000000 , 0.0 , -1154.54179629 -1154.54179629 beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 0.000000000000000 , 0.0 , -780.297563836 -780.297563836 beta, mu, g, lkl, logLkl = -1.90000000000000 , 100 , 0.000000000000000 , 0.0 , -1767.67463989 -1767.67463989 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -951.140203765 -951.140203765 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 1.8497040141e-244 , -561.215737057 -561.215737057 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -1530.84525417 -1530.84525417 beta, mu, g, lkl, logLkl = 10 , 1 , 0.000000000000000 , 0.0 , -899.806101283 -899.806101283 beta, mu, g, lkl, logLkl = 10 , 10 , 0.000000000000000 , 2.09476181715e-241 , -554.183567556 -554.183567556 beta, mu, g, lkl, logLkl = 10 , 100 , 0.000000000000000 , 0.0 , -1511.60217057 -1511.60217057 beta, mu, g, lkl, logLkl = 20 , 1 , 0.000000000000000 , 0.0 , -883.534377758 -883.534377758 beta, mu, g, lkl, logLkl = 20 , 10 , 0.000000000000000 , 3.78782789921e-241 , -553.59121467 -553.59121467 beta, mu, g, lkl, logLkl = 20 , 100 , 0.000000000000000 , 0.0 , -1535.52126976 -1535.52126976 beta, mu, g, lkl, logLkl = 50 , 1 , 0.000000000000000 , 0.0 , -898.52707297 -898.52707297 beta, mu, g, lkl, logLkl = 50 , 10 , 0.000000000000000 , 2.07326978324e-237 , -544.983540073 -544.983540073 beta, mu, g, lkl, logLkl = 50 , 100 , 0.000000000000000 , 0.0 , -1507.09522843 -1507.09522843 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [500, 10, 30, 10, 0.000000000000000, 10, 50, 10, 0.000000000000000, -544.98354007314447]
# dataset 4Gbis - SAME WITH LARGER BETA

set_random_seed(12456625)
np.random.seed(int(78764507))
n=10
N0=1
TrueBeta=50
TrueG=0.0
TrueTheta=10
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
    tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
    TFS.append(tfs)

# mean number of segregating sites across loci
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites
54.55
set_random_seed(1245655)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[-1.9,0.0,20,50,70,100];
GrowthList=[0.0];
ThetaList=[10];
results=[]
Reps=500; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci1,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 0.000000000000000 , 0.0 , -779.448068357 -779.448068357 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 1.15508278542e-247 , -568.594345952 -568.594345952 beta, mu, g, lkl, logLkl = 20 , 10 , 0.000000000000000 , 1.44022837574e-232 , -533.834939879 -533.834939879 beta, mu, g, lkl, logLkl = 50 , 10 , 0.000000000000000 , 4.92456037301e-226 , -518.78999601 -518.78999601 beta, mu, g, lkl, logLkl = 70 , 10 , 0.000000000000000 , 9.84020154202e-222 , -508.887414452 -508.887414452 beta, mu, g, lkl, logLkl = 100 , 10 , 0.000000000000000 , 5.46929618175e-225 , -516.382495984 -516.382495984 [Reps, n, NumLoci1, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [500, 10, 30, 50, 0.000000000000000, 10, 70, 10, 0.000000000000000, -508.88741445191135]
# same computations, with more loci (100 instead of 30)
set_random_seed(1245655)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci2)]
BetaList=[-1.9,0.0,20,50,70,100];
GrowthList=[0.0];
ThetaList=[10];
results=[]
Reps=500; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            print logLAcrossAllLoci
            results.append([Reps,n,NumLoci2,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

beta, mu, g, lkl, logLkl = -1.90000000000000 , 10 , 0.000000000000000 , 0.0 , -2678.13043644 -2678.13043644 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -1906.68164481 -1906.68164481 beta, mu, g, lkl, logLkl = 20 , 10 , 0.000000000000000 , 0.0 , -1771.44766253 -1771.44766253 beta, mu, g, lkl, logLkl = 50 , 10 , 0.000000000000000 , 0.0 , -1771.74216851 -1771.74216851 beta, mu, g, lkl, logLkl = 70 , 10 , 0.000000000000000 , 0.0 , -1759.10585603 -1759.10585603 beta, mu, g, lkl, logLkl = 100 , 10 , 0.000000000000000 , 0.0 , -1798.32218429 -1798.32218429 [Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [500, 10, 100, 50, 0.000000000000000, 10, 70, 10, 0.000000000000000, -1759.1058560291142]
# dataset 5G - LESS EXTREME DEPARTURES FROM Beta=0
set_random_seed(12456546546)
np.random.seed(int(78786767))
n=15
N0=1
TrueBeta=-1
TrueG=0.0
TrueTheta=10
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)

# infer
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,10.0,100.0];
ThetaList=[1,10,100];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 0.000000000000000 , 0.0 , -2485.44180413698 beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -1595.65973228989 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -3009.74146339138 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -2497.49538365434 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -1629.09844522608 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -3054.63957052542 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 0.0 , -2524.46093727679 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -1721.68556524245 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -3233.84204749802 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 10.0000000000000 , 0.0 , -3029.81399220508 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10.0000000000000 , 0.0 , -3063.60307631410 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10.0000000000000 , 0.0 , -3196.15624867809 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 100.000000000000 , 0.0 , -infinity [Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [20, 15, 30, -1, 0.000000000000000, 10, -1.00000000000000, 10, 0.000000000000000, -1595.65973228989]

# dataset 6G - SAME WITH MORE BALANCED TREES

set_random_seed(12456546546)
np.random.seed(int(78786767))
n=15
N0=1
TrueBeta=10.0
TrueG=0.0
TrueTheta=10
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)

# infer
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,10.0,100.0];
ThetaList=[1,10,100];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 0.000000000000000 , 0.0 , -2426.00860033181 beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -1807.30350551266 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -3353.27443791154 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 0.000000000000000 , 0.0 , -2386.08740026739 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -1764.68461686015 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -3296.49162836633 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 0.000000000000000 , 0.0 , -2318.48151263816 beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 0.000000000000000 , 0.0 , -1651.92027054266 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 0.000000000000000 , 0.0 , -3295.30336629113 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 10.0000000000000 , 0.0 , -3334.17111890078 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 10.0000000000000 , 0.0 , -3336.53844782303 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 10.0000000000000 , 0.0 , -3275.05019894113 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 1 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10 , 100.000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100 , 100.000000000000 , 0.0 , -infinity [Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [20, 15, 30, 10.0000000000000, 0.000000000000000, 10, 10.0000000000000, 10, 0.000000000000000, -1651.92027054266]
# dataset 6G' - SAME WITH slightly MORE BALANCED TREES but with large theta
# this is to check against the best fitted beta and theta in the beta-splitting model fitted against the complex ms simulation from msdoc below

set_random_seed(12456546546)
np.random.seed(int(78786767))
n=15
N0=1
TrueBeta=5.0
TrueG=0.0
TrueTheta=100
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)

# infer
set_random_seed(12456546546)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,5.0];
GrowthList=[0.0];
ThetaList=[10,100,1000];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
beta, mu, g, lkl, logLkl = -1.00000000000000 , 10 , 0.000000000000000 , 0.0 , -5347.9254917 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100 , 0.000000000000000 , 0.0 , -4181.90160246 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000 , 0.000000000000000 , 0.0 , -7318.4541281 beta, mu, g, lkl, logLkl = 0.000000000000000 , 10 , 0.000000000000000 , 0.0 , -4506.96656944 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100 , 0.000000000000000 , 0.0 , -3629.03506764 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000 , 0.000000000000000 , 0.0 , -6476.40522229 beta, mu, g, lkl, logLkl = 5.00000000000000 , 10 , 0.000000000000000 , 0.0 , -4211.11864562 beta, mu, g, lkl, logLkl = 5.00000000000000 , 100 , 0.000000000000000 , 0.0 , -3316.56095862 beta, mu, g, lkl, logLkl = 5.00000000000000 , 1000 , 0.000000000000000 , 0.0 , -5760.49369151 [Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [20, 15, 30, 5.00000000000000, 0.000000000000000, 100, 5.00000000000000, 100, 0.000000000000000, -3316.5609586219052]
Sanity check: increased mutation rate (i.e., more SNPs) gives better estimates
# dataset 7G with g=2 and increased theta
set_random_seed(12533)
np.random.seed(np.int(1245))
n=15
N0=1
TrueBeta=0.0
TrueG=2.0
TrueTheta=100.0
NumLoci=60

TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)
 #tfs[2]
# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites
364.35
set_random_seed(12451116)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1.0,2.0,3.0,5.0,10.0];
ThetaList=[100.0];
results=[]
Reps=200; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -4907.78167671551 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -4770.02044249150 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -4733.57552978068 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -4858.67216675298 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -4758.11451797914 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -4754.97193836299 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -4938.48773788207 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -4692.51974709802 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -4719.45055633706 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -5014.20031830045 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -4846.31369921827 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -4836.91856516081 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -5949.63834428161 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -5767.59346562225 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -5728.97568897941 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -13897.3116586207 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -13681.8415389646 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -infinity

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [200, 15, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 2.00000000000000, -4692.51974709802]
# dataset 8G - with increased n
set_random_seed(122323533)
np.random.seed(np.int(1211145))
n=30
N0=1
TrueBeta=0.0
TrueG=2.0
TrueTheta=100.0
NumLoci=60

TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)
 #tfs[2]
# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites
462.516666667
set_random_seed(12451116) # we can find signal of growth and beta over theta (gHat=1.0 instead of trueG=2.0)
np.random.seed(int(1234))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1.0,2.0,3.0,5.0,10.0];
ThetaList=[1.0,10.0,100.0,1000.0];
results=[]
Reps=200; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,5000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta,g,GrRates,n,sfsMultiLocusData,Reps)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
results
beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 0.000000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 0.000000000000000 , 0.0 , -16422.3009309419 beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -16028.4737429655 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 0.000000000000000 , 0.0 , -26795.5988144158 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 0.000000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 0.000000000000000 , 0.0 , -16128.6146952371 beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -15662.5259045307 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 0.000000000000000 , 0.0 , -26254.8450548033 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 0.000000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 0.000000000000000 , 0.0 , -16153.5678644303 beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 0.000000000000000 , 0.0 , -15850.5218021710 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 0.000000000000000 , 0.0 , -26538.9522004612 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -15961.8514298296 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 1.00000000000000 , 0.0 , -27117.3003394868 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -15537.5894574872 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 1.00000000000000 , 0.0 , -26370.1676322233 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 1.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 1.00000000000000 , 0.0 , -15705.6247847785 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 1.00000000000000 , 0.0 , -26446.3182949772 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -16152.0144288948 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 2.00000000000000 , 0.0 , -26887.1974399624 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -15768.9988127428 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 2.00000000000000 , 0.0 , -26070.4776896200 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 2.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 2.00000000000000 , 0.0 , -15698.4926490336 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 2.00000000000000 , 0.0 , -26270.5375180653 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -16327.6260523775 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 3.00000000000000 , 0.0 , -26921.0641552768 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -15834.3765202690 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 3.00000000000000 , 0.0 , -26239.3022865899 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 3.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 3.00000000000000 , 0.0 , -15929.4729365962 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 3.00000000000000 , 0.0 , -26456.7767878908 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -17140.0109560034 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 5.00000000000000 , 0.0 , -26809.5379888034 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -16710.2692409932 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 5.00000000000000 , 0.0 , -26264.8777998098 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 5.00000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 5.00000000000000 , 0.0 , -17005.1235072424 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 5.00000000000000 , 0.0 , -26267.6976961831 beta, mu, g, lkl, logLkl = -1.00000000000000 , 1.00000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 10.0000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = -1.00000000000000 , 1000.00000000000 , 10.0000000000000 , 0.0 , -26533.4086149551 beta, mu, g, lkl, logLkl = 0.000000000000000 , 1.00000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 10.0000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 0.000000000000000 , 1000.00000000000 , 10.0000000000000 , 0.0 , -26197.9966739432 beta, mu, g, lkl, logLkl = 10.0000000000000 , 1.00000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 10.0000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 100.000000000000 , 10.0000000000000 , 0.0 , -infinity beta, mu, g, lkl, logLkl = 10.0000000000000 , 1000.00000000000 , 10.0000000000000 , 0.0 , -26294.5184967989 [[200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 0.000000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 0.000000000000000, -16422.3009309419], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 0.000000000000000, -16028.4737429655], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 0.000000000000000, -26795.5988144158], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 0.000000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 0.000000000000000, -16128.6146952371], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 0.000000000000000, -15662.5259045307], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 0.000000000000000, -26254.8450548033], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 0.000000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 0.000000000000000, -16153.5678644303], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 0.000000000000000, -15850.5218021710], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 0.000000000000000, -26538.9522004612], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 1.00000000000000, -15961.8514298296], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 1.00000000000000, -27117.3003394868], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 1.00000000000000, -15537.5894574872], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 1.00000000000000, -26370.1676322233], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 1.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 1.00000000000000, -15705.6247847785], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 1.00000000000000, -26446.3182949772], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 2.00000000000000, -16152.0144288948], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 2.00000000000000, -26887.1974399624], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 2.00000000000000, -15768.9988127428], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 2.00000000000000, -26070.4776896200], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 2.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 2.00000000000000, -15698.4926490336], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 2.00000000000000, -26270.5375180653], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 3.00000000000000, -16327.6260523775], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 3.00000000000000, -26921.0641552768], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 3.00000000000000, -15834.3765202690], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 3.00000000000000, -26239.3022865899], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 3.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 3.00000000000000, -15929.4729365962], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 3.00000000000000, -26456.7767878908], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 5.00000000000000, -17140.0109560034], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 5.00000000000000, -26809.5379888034], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 5.00000000000000, -16710.2692409932], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 5.00000000000000, -26264.8777998098], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 5.00000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 5.00000000000000, -17005.1235072424], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 5.00000000000000, -26267.6976961831], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1.00000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 10.0000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 100.000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, -1.00000000000000, 1000.00000000000, 10.0000000000000, -26533.4086149551], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1.00000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 10.0000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 1000.00000000000, 10.0000000000000, -26197.9966739432], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1.00000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 10.0000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 100.000000000000, 10.0000000000000, -infinity], [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 10.0000000000000, 1000.00000000000, 10.0000000000000, -26294.5184967989]]

# report the most likely values
print("[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
[Reps, n, NumLoci, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [200, 30, 60, 0.000000000000000, 2.00000000000000, 100.000000000000, 0.000000000000000, 100.000000000000, 1.00000000000000, -15537.5894574872]

Producing data to test the algorithms under bottleneck model


# dataset B1 - RECENT MILD BOTTLENECK - SMALL SAMPLE SIZE
set_random_seed(656005)
np.random.seed(int(10043))
n=3
N0=1
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=50
NumLoci1=30
NumLoci2=100
TFS=[]
for i in range(NumLoci2):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci2)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

# inference
set_random_seed(12456546)
np.random.seed(int(12345))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci2)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10];
ThetaList=[TrueTheta];
results=[]
Reps=500; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci2,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

                # report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][13]>results[MaxI][13]:
       MaxI=i
results[MaxI]
OUTPUT:
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.00100000000000000  , 1  , -3608.48793736
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.0100000000000000  , 1  , -1141.76086703
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 0.100000000000000  , 1  , -581.719746685
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 1.00000000000000  , 1  , -762.958933525
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.00500000000000000  , 0.105000000000000  , 10  , 1  , -997.965051971
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.00100000000000000  , 1  , -1502.2515838
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.0100000000000000  , 1  , -679.440596264
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 0.100000000000000  , 1  , -530.748263574
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 1.00000000000000  , 1  , -770.327024986
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.0500000000000000  , 0.150000000000000  , 10  , 1  , -967.593805413
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.00100000000000000  , 1  , -766.899089643
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.0100000000000000  , 1  , -760.754601725
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 0.100000000000000  , 1  , -765.649526704
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 1.00000000000000  , 1  , -781.649732233
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 0.500000000000000  , 0.600000000000000  , 10  , 1  , -769.549791934
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.00100000000000000  , 1  , -773.433328349
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.0100000000000000  , 1  , -773.235962739
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 0.100000000000000  , 1  , -773.176668532
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 1.00000000000000  , 1  , -767.145018239
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 1.00000000000000  , 1.10000000000000  , 10  , 1  , -774.944687062
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.00100000000000000  , 1  , -781.470446754
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.0100000000000000  , 1  , -774.247268458
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 0.100000000000000  , 1  , -754.533162385
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 1.00000000000000  , 1  , -780.409887946
beta, mu, a, b, eps, N0, logLkl =  0.000000000000000  , 50  , 5.00000000000000  , 5.10000000000000  , 10  , 1  , -774.066273148

The ML parameters are:
beta, mu, a, b, eps, N0, logLkl =  0.0, 50, 0.05, 0.15, 0.10, 1 , -530.748263574
true parameters are             =  0.0, 50, 0.05, 0.15, 0.01, 1     

so eps is over estimated.
# inference with less loci but more particles
set_random_seed(12456546)
np.random.seed(int(12345))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci1)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10];
ThetaList=[TrueTheta];
results=[]
Reps=1000; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci1,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])

beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -1163.51882255 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -365.696350645 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -173.175337965 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1.00000000000000 , 1 , -227.303776888 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 1 , -431.531782353 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -193.513111235 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -152.052859489 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -225.725399602 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -287.570401128 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -229.547239398 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -229.415438751 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -224.735694798 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -229.699559894 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -224.716661467 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -226.39264498 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -225.275990284 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -231.04895718 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -221.720299295 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -225.739136262 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -226.219278671 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -226.809838563 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -228.359573602 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -218.018749341 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -229.290564857

# dataset B2 - RECENT MILD BOTTLENECK - LARGER SAMPLE SIZE
set_random_seed(656005)
np.random.seed(int(10043))
n=20
N0=1
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=50
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(124546)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=500; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
47.8333333333 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -10891.5750864 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -4825.7295686 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -3846.83371819 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -4703.34086634 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -7525.41896187 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -5415.94989691 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -4043.44802175 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -3842.49433871 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -4268.33273154 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -3846.97192437 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -3865.06924946 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -3887.6771275 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -3814.16498995 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -3843.67431366 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -3871.95893156 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -3817.47356273 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -3879.2569377 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -3874.41545538 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -3859.62890493 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -3834.15887986 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -3895.44941551 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -3841.71901123 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -3827.64421938 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 50 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -3864.20955157

# dataset B3 - RECENT MILD BOTTLENECK -SMALL SAMPLE SIZE - HIGHER MUTATION RATE
set_random_seed(656005)
np.random.seed(int(10043))
n=10
N0=1
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=150
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(124546)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=500; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
100.633333333 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -4143.05110455 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -1565.313949 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -1521.12915228 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -1996.62453825 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -1931.59287118 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -1272.43522785 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -1537.94575198 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -1870.70083238 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -1525.7970105 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -1516.00736295 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -1532.82907382 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -1524.09678295 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -1508.19143274 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -1516.78782758 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -1504.09091572 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -1535.60390314 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -1510.11801896 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -1521.08752307 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -1521.71754983 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -1514.28750067 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -1531.01126246 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -1505.00522372 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 150 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -1533.90259185
# dataset B3bis - RECENT MILD BOTTLENECK -SMALL SAMPLE SIZE - EVEN HIGHER MUTATION RATE
set_random_seed(656005)
np.random.seed(int(10043))
n=10
N0=1
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=500
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(124546)
np.random.seed(int(123545))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=500; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
367.7 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -6259.71012625 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -2004.72334607 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -1901.35655085 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -2394.39501464 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -2854.14393912 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -1638.08135101 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -1900.43610638 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -2352.27307662 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -1889.43841676 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -1887.05300588 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -1886.20850772 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -1920.82108268 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -1916.17760265 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -1901.56034954 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -1903.36337373 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -1892.45424539 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -1903.58549086 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -1875.72009448 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -1904.70426324 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -1883.57480008 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -1908.90330908 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -1883.73005132 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 500 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -1918.55885588
With small mutation rates
# dataset B4 - RECENT MILD BOTTLENECK WITH LOW MUTATION RATE - SAMPLE SIZE 3, 1000 LOCI
set_random_seed(656005)
np.random.seed(int(10043))
n=3
N0=1 # this is fixed at unity as in Hudson's ms
TrueBeta=0
TrueA=0.05
TrueGap=0.1
TrueB=TrueA+TrueGap
TrueEps=0.01
TrueTheta=5
NumLoci=1000
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

# inference
set_random_seed(12456546)
np.random.seed(int(12345))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=200; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
1.284 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -8199.16658344159 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -4845.14168480036 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -2003.64279794536 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -3325.75934805400 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -4173.86472893806 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -5531.93157332962 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -3343.26549075243 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -1990.78719179484 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -3324.88864932211 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -4026.85032236171 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -3442.50066084301 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -3356.50634363227 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -3233.20706100601 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -3336.05667148710 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -3337.73846438619 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -3415.27071905189 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -3376.21020027055 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -3315.83668731960 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -3348.51498307126 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -3330.94713321818 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -3335.65005101939 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -3309.76392134207 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -3328.76412119589 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -3350.04170366840 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -3323.10652571638
We don't detect non-existant bottlenecks
# dataset B5 - NO BOTTLENECKS - SAMPLE SIZE 5, 100 LOCI
set_random_seed(656005)
np.random.seed(int(10043))
n=5
N0=1 # this is fixed at unity as in Hudson's ms
TrueBeta=0
TrueA=0.05       #dummy
TrueGap=0.1      #dummy
TrueB=TrueA+TrueGap
TrueEps=1.0    #no reduction in pop size
TrueTheta=5
NumLoci=100
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(12456546)
np.random.seed(int(12345))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=1000; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
19.71 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -14893.7528941 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -5307.23388516 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -1017.54379745 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -377.626413346 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -371.38288873 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -23592.9313808 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -4618.22388074 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -833.920280682 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -366.136998204 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -368.620578748 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -5251.96210255 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -1152.24064874 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -469.945393142 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -378.677354715 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -374.894630666 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -2053.68035855 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -698.33914174 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -405.69827546 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -358.672249758 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -385.701546557 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -377.856904102 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -383.864605249 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -370.258771678 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -353.622678588 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -372.561354959

# dataset B6 - NO BOTTLENECKS - SAMPLE SIZE 10, 30 LOCI
set_random_seed(656)
np.random.seed(int(143))
n=10
N0=1 # this is fixed at unity as in Hudson's ms
TrueBeta=0
TrueA=0.05       #dummy
TrueGap=0.1      #dummy
TrueB=TrueA+TrueGap
TrueEps=1.0    #no reduction in pop size
TrueTheta=5
NumLoci=30
TFS=[]
for i in range(NumLoci):
 tfs = MakeS_Btl(TrueBeta,N0,TrueA,TrueB,TrueEps,TrueTheta,n)
 TFS.append(tfs)

# mean number of segregating sites
segSitesAcrossLoci=[np.sum(TFS[j][2]) for j in range(0,NumLoci)];
meanSegSites=mean(segSitesAcrossLoci);
print meanSegSites

#inference
set_random_seed(12456)
np.random.seed(int(15))
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
BetaList=[0.0];
AList=[0.005,0.05,0.5,1.0,5.0];
EpsList=[0.001,0.01, 0.1,1.0,10]
ThetaList=[TrueTheta];
results=[]
Reps=1000; # number of particles
for a in AList:
    b=a+TrueGap
    for eps in EpsList:
        btlRates = GetRatesForBottleneck(a,b,eps,N0,n,1000)
        for beta in BetaList:
            for theta in ThetaList:
                logLAcrossAllLoci = LklOfBetaThetaBottleneckRates(beta,theta,a,b,eps,N0,btlRates,n,sfsMultiLocusData,Reps)
                results.append([Reps,n,NumLoci,TrueBeta,beta,TrueTheta,theta,TrueA,a,TrueB,b,TrueEps,eps,logLAcrossAllLoci])
30.0333333333 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.0100000000000000 , 1 , -4020.72436877 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 0.100000000000000 , 1 , -1294.12377314 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 1.00000000000000 , 1 , -463.904646526 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.00500000000000000 , 0.105000000000000 , 10 , 1 , -435.304203016 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.00100000000000000 , 1 , -inf beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.0100000000000000 , 1 , -4286.62612305 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 0.100000000000000 , 1 , -1046.51737715 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 1.00000000000000 , 1 , -455.09475188 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.0500000000000000 , 0.150000000000000 , 10 , 1 , -423.612616743 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.00100000000000000 , 1 , -2541.73302458 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.0100000000000000 , 1 , -936.115534231 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 0.100000000000000 , 1 , -544.935290976 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 1.00000000000000 , 1 , -452.606864125 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 0.500000000000000 , 0.600000000000000 , 10 , 1 , -483.952790351 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.00100000000000000 , 1 , -1092.86592868 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.0100000000000000 , 1 , -659.687709815 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 0.100000000000000 , 1 , -475.99086912 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 1.00000000000000 , 1 , -438.91106984 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 1.00000000000000 , 1.10000000000000 , 10 , 1 , -465.69934625 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.00100000000000000 , 1 , -461.106491242 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.0100000000000000 , 1 , -458.676482539 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 0.100000000000000 , 1 , -453.921258108 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 1.00000000000000 , 1 , -461.540988399 beta, mu, a, b, eps, N0, logLkl = 0.000000000000000 , 5 , 5.00000000000000 , 5.10000000000000 , 10 , 1 , -430.72283051

Simulating Complex Demographical and Structural Scenarios for β\beta-Effective Population Size (β,Ne)(\beta, N_e)

The next cell is just testing if ms is already installed.

If not run the second cell below which will be downloading and compiling a slightly augmented version of Hudson's ms (just for extracting SFS from the the binary incidence matrices).

There is no need to run the second cell below is ms is already installed in /home/user/msdir/ms/.

1
1
%sh
ms 5 2 -T
ms 5 2 -T 48467 38575 3753 // (5:1.335576295853,(3:0.165384739637,(2:0.131984353065,(1:0.024002499878,4:0.024002499878):0.107981853187):0.033400386572):1.170191526413); // ((1:0.169807076454,3:0.169807076454):0.590738654137,(4:0.208807185292,(2:0.181732580066,5:0.181732580066):0.027074605227):0.551738560200);
%sh
rm -rf /home/user/msdir &&
cd /home/user &&
wget https://github.com/lamastex/lce/raw/master/msSimulations/msdir.tar.gz &&
tar zxvf msdir.tar.gz &&
cd msdir &&
gcc -O3 -o ms ms.c streec.c rand1.c -lm &&
gcc -o sample_statsSFS sample_statsSFS.c tajd.c -lm
export PATH="/home/user/msdir:$PATH"
ls -al
--2018-02-12 21:38:14-- https://github.com/lamastex/lce/raw/master/msSimulations/msdir.tar.gz Resolving github.com (github.com)... 192.30.253.112, 192.30.253.113 Connecting to github.com (github.com)|192.30.253.112|:443... connected. HTTP request sent, awaiting response... 302 Found Location: https://raw.githubusercontent.com/lamastex/lce/master/msSimulations/msdir.tar.gz [following] --2018-02-12 21:38:15-- https://raw.githubusercontent.com/lamastex/lce/master/msSimulations/msdir.tar.gz Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ... Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 402688 (393K) [application/octet-stream] Saving to: ‘msdir.tar.gz.6’ msdir.tar.gz.6 0%[ ] 0 --.-KB/s msdir.tar.gz.6 100%[===================>] 393.25K --.-KB/s in 0.004s 2018-02-12 21:38:15 (86.8 MB/s) - ‘msdir.tar.gz.6’ saved [402688/402688] msdir/ msdir/._rand1.c msdir/.DS_Store msdir/streec.c msdir/params msdir/rand1t.c msdir/R_example_1/ msdir/R_example_1/thetabayesfig.pdf msdir/R_example_1/._thetabayesfig.pdf msdir/R_example_1/thetabayes.R msdir/R_example_1/._thetabayes.R msdir/._microsat.c msdir/out msdir/._msdoc.pdf msdir/tst msdir/._R_example_3 msdir/._streec.c msdir/._R_example_1 msdir/sample_statsSFS.c msdir/._readms.output.R msdir/R_example_2/ msdir/R_example_2/._demog_infer_simple.R msdir/R_example_2/._demoginfersimplefig.pdf msdir/R_example_2/demoginfersimplefig.pdf msdir/R_example_2/demog_infer_simple.R msdir/._clms msdir/._stats.c msdir/seedms msdir/readms.output.R msdir/._sample_stats.c msdir/ms.c msdir/time2epoch.py msdir/._seedms msdir/ort msdir/rand2t.c msdir/._params msdir/microsat.c msdir/msdoc.pdf msdir/rand2.c msdir/tst2 msdir/._col1 msdir/._readme msdir/readme msdir/ms.h msdir/sample_stats.c msdir/tajd.c msdir/._rand2t.c msdir/._rand2.c msdir/._migmat msdir/._tajd.c msdir/._.DS_Store msdir/._ms.h msdir/dist3.c msdir/stats.c msdir/._dist3.c msdir/._ms.c msdir/R_example_3/ msdir/R_example_3/demog_infer2d.R msdir/R_example_3/demoginfer2dfig2.pdf msdir/R_example_3/._demoginfer2dfig2.pdf msdir/R_example_3/._demog_infer2d.R msdir/._rand1t.c msdir/migmat msdir/clms msdir/R_example_4/ msdir/R_example_4/._demoginfermcmcfig1.pdf msdir/R_example_4/demog_infer_mcmc.R msdir/R_example_4/demoginfermcmcfig2.pdf msdir/R_example_4/demoginfermcmcfig1.pdf msdir/R_example_4/._demoginfermcmcfig2.pdf msdir/R_example_4/._demog_infer_mcmc.R msdir/col1 msdir/._R_example_2 msdir/rand1.c msdir/ms.out msdir/ms msdir/sample_statsSFS msdir/._R_example_4 msdir/tst2Ts
ms.c: In function ‘main’:
ms.c:148:42: warning: ignoring return value of ‘scanf’, declared with attribute warn_unused_result [-Wunused-result]
if( ntbs > 0 ) for( k=0; k<ntbs; k++) scanf(" %s", tbsparamstrs[k] );
                                          ^
sample_statsSFS.c:14:1: warning: return type defaults to ‘int’ [-Wimplicit-int]
main(argc,argv)
 ^
sample_statsSFS.c: In function ‘main’:
sample_statsSFS.c:67:9: warning: implicit declaration of function ‘biggerlist’ [-Wimplicit-function-declaration]
biggerlist(nsam,maxsites, list) ;
         ^
total 428
drwxr-x--- 6 user user     62 Feb 12 21:38 .
drwx------ 6 user user     29 Feb 12 21:38 ..
-rw-r----- 1 user user 6148 Nov 8 22:39 .DS_Store -rw-r----- 1 user user 222 Nov 8 22:39 ._.DS_Store
-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_1
-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_2
-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_3
-rwxr-x--- 1 user user    222 Nov  5  2006 ._R_example_4
-rwxr----- 1 user user    222 Mar  9  2003 ._clms
-rwxr----- 1 user user    222 Jan  5  2004 ._col1
-rw-r----- 1 user user 222 Nov 5 2006 ._dist3.c -rw-r----- 1 user user 222 Nov 7 2006 ._microsat.c -rw-r----- 1 user user 222 Mar 9 2003 ._migmat -rw-r--r-- 1 user user 975 Nov 8 20:23 ._ms.c -rw-r----- 1 user user 514 Nov 8 19:15 ._ms.h -rw-r--r-- 1 user user 177 Oct 16 19:03 ._msdoc.pdf -rw-r----- 1 user user 222 Mar 8 2003 ._params -rw-r----- 1 user user 222 May 3 2007 ._rand1.c -rw-r----- 1 user user 222 May 3 2007 ._rand1t.c -rw-r----- 1 user user 222 May 3 2007 ._rand2.c -rw-r----- 1 user user 222 May 3 2007 ._rand2t.c -rw-r----- 1 user user 222 Mar 8 2003 ._readme -rw-r----- 1 user user 222 May 31 2007 ._readms.output.R -rw-r----- 1 user user 222 May 21 2007 ._sample_stats.c -rw-r----- 1 user user 222 Feb 4 2010 ._seedms -rw-r----- 1 user user 222 Mar 8 2003 ._stats.c -rw-r----- 1 user user 958 Mar 22 2017 ._streec.c -rw-r----- 1 user user 222 Mar 8 2003 ._tajd.c
drwxr-x--- 2 user user      6 Jan 11 07:17 R_example_1
drwxr-x--- 2 user user      6 Jan 11 07:17 R_example_2
drwxr-x--- 2 user user      6 Jan 11 07:17 R_example_3
drwxr-x--- 2 user user      8 Jan 11 07:17 R_example_4
-rwxr----- 1 user user     43 Mar  9  2003 clms
-rwxr----- 1 user user     23 Jan  5  2004 col1
-rw-r----- 1 user user 1531 Nov 5 2006 dist3.c -rw-r----- 1 user user 2937 Nov 7 2006 microsat.c -rw-r----- 1 user user 69 Mar 9 2003 migmat
-rwxr-xr-x 1 user user  53248 Feb 12 21:38 ms
-rw-r--r-- 1 user user 36485 Jan 12 12:46 ms.c -rw-r----- 1 user user 1226 Nov 8 19:15 ms.h -rw-r--r-- 1 user user 534 Jan 11 07:19 ms.out -rw-r--r-- 1 user user 226165 Oct 16 19:03 msdoc.pdf -rw-r--r-- 1 user user 110655 Jan 12 08:12 ort -rw-r--r-- 1 user user 1050 Jan 11 15:28 out -rw-r----- 1 user user 16 Mar 8 2003 params -rw-r----- 1 user user 1225 May 3 2007 rand1.c -rw-r----- 1 user user 821 May 3 2007 rand1t.c -rw-r----- 1 user user 816 May 3 2007 rand2.c -rw-r----- 1 user user 580 May 3 2007 rand2t.c -rw-r----- 1 user user 1386 Mar 8 2003 readme -rw-r----- 1 user user 3501 May 31 2007 readms.output.R -rw-r----- 1 user user 4675 May 21 2007 sample_stats.c
-rwxr-xr-x 1 user user  17976 Feb 12 21:38 sample_statsSFS
-rw-r----- 1 user user 5723 Jan 11 16:19 sample_statsSFS.c -rw-r----- 1 user user 17 Jan 12 13:23 seedms -rw-r----- 1 user user 1751 Mar 8 2003 stats.c -rw-r----- 1 user user 22032 Mar 22 2017 streec.c -rw-r----- 1 user user 1858 Mar 8 2003 tajd.c -rw-r--r-- 1 user user 1183 Jan 12 13:20 time2epoch.py -rw-r--r-- 1 user user 2419 Jan 12 13:23 tst -rw-r--r-- 1 user user 71 Jan 12 06:31 tst2 -rw-r--r-- 1 user user 49 Jan 12 08:06 tst2Ts
%sh
ms
Too few command line arguments usage: ms nsam howmany Options: -t theta (this option and/or the next must be used. Theta = 4*N0*u ) -s segsites ( fixed number of segregating sites) -T (Output gene tree.) -F minfreq Output only sites with freq of minor allele >= minfreq. -r rho nsites (rho here is 4Nc) -c f track_len (f = ratio of conversion rate to rec rate. tracklen is mean length.) if rho = 0., f = 4*N0*g, with g the gene conversion rate. -G alpha ( N(t) = N0*exp(-alpha*t) . alpha = -log(Np/Nr)/t -I npop n1 n2 ... [mig_rate] (all elements of mig matrix set to mig_rate/(npop-1) -m i j m_ij (i,j-th element of mig matrix set to m_ij.) -ma m_11 m_12 m_13 m_21 m_22 m_23 ...(Assign values to elements of migration matrix.) -n i size_i (popi has size set to size_i*N0 -g i alpha_i (If used must appear after -M option.) The following options modify parameters at the time 't' specified as the first argument: -eG t alpha (Modify growth rate of all pop's.) -eg t i alpha_i (Modify growth rate of pop i.) -eM t mig_rate (Modify the mig matrix so all elements are mig_rate/(npop-1) -em t i j m_ij (i,j-th element of mig matrix set to m_ij at time t ) -ema t npop m_11 m_12 m_13 m_21 m_22 m_23 ...(Assign values to elements of migration matrix.) -eN t size (Modify pop sizes. New sizes = size*N0 ) -en t i size_i (Modify pop size of pop i. New size of popi = size_i*N0 .) -es t i proportion (Split: pop i -> pop-i + pop-npop, npop increases by 1. proportion is probability that each lineage stays in pop-i. (p, 1-p are admixt. proport. Size of pop npop is set to N0 and alpha = 0.0 , size and alpha of pop i are unchanged. -ej t i j ( Join lineages in pop i and pop j into pop j size, alpha and M are unchanged. -eA t popi num ( num Ancient samples from popi at time t -f filename ( Read command line arguments from file filename.) -p n ( Specifies the precision of the position output. n is the number of digits after the decimal.) -a (Print out allele ages.) See msdoc.pdf for explanation of these parameters. ms

To get SFS from ms output do as follows

n=15; NumLoci=100; TrueTheta=10; # make sure these agree with `ms n NumLoci -t TrueTheta` in the `%sh mc .. ` cell below
N0=1.0; TrueG=0.0; TrueBeta=0;
%sh ms 15 100 -t 10.0 | sample_statsSFS > output
%sh
pwd
head -n 2 output
/home/user/msdir 10 4 6 1 0 8 0 0 2 0 0 0 0 0 23 1 0 1 2 0 0 0 0 0 0 0 0 2
msSfsDataRaw = np.genfromtxt('/home/user/msdir/output', delimiter=' ')
msSfsMultiLocusData = [np.insert( np.insert(a, -1, 0., axis=0), 0, 0., axis=0) for a in msSfsDataRaw]
#msSfsMultiLocusData
# the Watterson estimator of theta under panmictic Kingman prior
print (mean([np.sum(msSfsMultiLocusData[j]) for j in range(0,NumLoci)])) / (sum(np.array([1.0/i for i in range(1,n,1)])))
9.86602647531
TrueBeta=0;
TFS=[]
for i in range(NumLoci):
 tfs = MakeS(TrueBeta,N0,TrueG,TrueTheta,n)
 TFS.append(tfs)
sfsMultiLocusData = [TFS[j][2] for j in range(0,NumLoci)]
#print sfsMultiLocusData
print (mean([np.sum(msSfsMultiLocusData[j]) for j in range(0,NumLoci)])) / (sum(np.array([1.0/i for i in range(1,n,1)])))
10.1120620483
# dataset 1G (REPEATED via ms for testing purposes) - KINGMAN'S COALESCENT WITH NO GROWTH

# Likelihood based on the full list of SFS
set_random_seed(9811545)
np.random.seed(int(918675))

BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0];
ThetaList=[1,10,100];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    #GrRates = GetRatesForGrowth(g,n,1000)
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!!
            #print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta)
            print([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
# from multiple replicate datasets
#[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
# [20, 15, 100,      0,        0.0,   10,        0.0,     10,       0.0,  -5093.2238698216834]
# [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5147.4560231561763]
[20, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 1, 0.000000000000000, -8575.5514680327287] [20, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 10, 0.000000000000000, -5217.8312013003724] [20, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 100, 0.000000000000000, -7626.3498637938847] [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 1, 0.000000000000000, -8479.243014885189] [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5147.4560231561763] [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 100, 0.000000000000000, -7780.1337279236359] [20, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 1, 0.000000000000000, -8562.8114291459169] [20, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 10, 0.000000000000000, -5268.6403181905471] [20, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 100, 0.000000000000000, -8040.3624073076298] [Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [20, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -5147.4560231561763]
# dataset 1G (REPEATED via ms for testing purposes) - KINGMAN'S COALESCENT WITH NO GROWTH

# Likelihood based on the full list of SFS
set_random_seed(9871545)
np.random.seed(int(918675))

BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0];
ThetaList=[1,10,100];
results=[]
Reps=100; # number of particles
for g in GrowthList:
    #GrRates = GetRatesForGrowth(g,n,1000)
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!!
            #print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta)
            print([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]
# from multiple replicate datasets
#[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]
# [20, 15, 100,      0,        0.0,   10,        0.0,     10,       0.0,  -5093.2238698216834]
# [100, 15, 100,     0,        0.0,   10,        0.0,     10,       0.0,  -4719.0559858475472]
[100, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 1, 0.000000000000000, -8084.2905567486741] [100, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 10, 0.000000000000000, -4803.2402300140157] [100, 15, 100, 0, 0.000000000000000, 10, -1.00000000000000, 100, 0.000000000000000, -7226.7640817544534] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 1, 0.000000000000000, -8034.6893234419967] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -4719.0559858475472] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -4719.0559858475472] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 100, 0.000000000000000, -7344.1201747338246] [100, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 1, 0.000000000000000, -8187.6802601310701] [100, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 10, 0.000000000000000, -4881.8177457366373] [100, 15, 100, 0, 0.000000000000000, 10, 10.0000000000000, 100, 0.000000000000000, -7583.3041235116634] [Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci] [100, 15, 100, 0, 0.000000000000000, 10, 0.000000000000000, 10, 0.000000000000000, -4719.0559858475472]
Reps=800; beta=0.0; theta=10.0; growth=0.0;
GrRates=[0.,0.];
for i in range(2,n+1,1):
    GrRates.append(1.0*binomial(i,2))
print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]")
for i in range(5):
    logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps)
    print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
'''
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5458.7347191116787]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5254.6231241170144]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5297.870156684322]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5396.4229009903611]
[10, 15, 100, 0.000000000000000, 10.0000000000000, -5294.8534748499615]

[100, 15, 100, 0.000000000000000, 10.0000000000000, -4637.1829885643956]
[100, 15, 100, 0.000000000000000, 10.0000000000000, -4575.514309145261]
[100, 15, 100, 0.000000000000000, 10.0000000000000, -4612.6721939365625]
[100, 15, 100, 0.000000000000000, 10.0000000000000, -4643.4324787908445]
[100, 15, 100, 0.000000000000000, 10.0000000000000, -4647.9556338215662]

[200, 15, 100, 0.000000000000000, 10.0000000000000, -4542.2875568237114]
[200, 15, 100, 0.000000000000000, 10.0000000000000, -4532.9762385300965]
[200, 15, 100, 0.000000000000000, 10.0000000000000, -4538.9995541760582]
[200, 15, 100, 0.000000000000000, 10.0000000000000, -4518.7981532977683]
[200, 15, 100, 0.000000000000000, 10.0000000000000, -4464.6882092921878]

[400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955]
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115]

[800, 15, 100, 0.000000000000000, 10.0000000000000, -4253.587709948677]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4285.4570525672298]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4229.0499889288321]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4306.3893655985867]
[800, 15, 100, 0.000000000000000, 10.0000000000000, -4315.2945672085107]
'''
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4253.587709948677] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4285.4570525672298] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4229.0499889288321] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4306.3893655985867] [800, 15, 100, 0.000000000000000, 10.0000000000000, -4315.2945672085107] '\n[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5458.7347191116787]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5254.6231241170144]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5297.870156684322]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5396.4229009903611]\n[10, 15, 100, 0.000000000000000, 10.0000000000000, -5294.8534748499615]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4637.1829885643956]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4575.514309145261]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4612.6721939365625]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4643.4324787908445]\n[100, 15, 100, 0.000000000000000, 10.0000000000000, -4647.9556338215662]\n\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4542.2875568237114]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4532.9762385300965]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4538.9995541760582]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4518.7981532977683]\n[200, 15, 100, 0.000000000000000, 10.0000000000000, -4464.6882092921878]\n\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955]\n[400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115]\n'

Reps=400
for i in range(5):
    logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps)
    print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
[400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4344.6063514647376] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4342.8307183177949] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4354.7931999467328] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4332.2102000277955] [400, 15, 100, 0.000000000000000, 10.0000000000000, -4352.7192419989115]

# dataset 1G (REPEATED via ms for testing purposes) - KINGMAN'S COALESCENT WITH NO GROWTH

# Likelihood based on the full list of SFS
set_random_seed(987545)
np.random.seed(int(18675))

BetaList=[-1.0,0.0,10.0];
GrowthList=[0.0,1.0,10.0];
ThetaList=[1,10,100];
results=[]
Reps=20; # number of particles
for g in GrowthList:
    GrRates = GetRatesForGrowth(g,n,1000)
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!!
            print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta)
            results.append([Reps,n,NumLoci,TrueBeta,TrueG,TrueTheta,beta,theta,g,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, TrueBeta, TrueG, TrueTheta, betaHat, thetaHat, gHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][9]>results[MaxI][9]:
       MaxI=i
results[MaxI]

Stochastic Global Optimisation for Point Estimation

https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.differential_evolution.html

%time
# using same SFS data as above with standard coalescent
from scipy.optimize import differential_evolution
Reps=100; # number of particles with 100 is taking too long in this naive implementation, but REPS=20 has too much vriance in likelihood estimate!
parametersAndLogLkl=[]
for g in [0]:
    #GrRates = GetRatesForGrowth(g,n,1000) # slow - when g=0 we can do it analytically as below
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    #logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps)
    def negLkl(BT):
        f = -LklOfBetaThetaGrowthRates(RR(BT[0]),RR(BT[1]/4.0),g,GrRates,n,msSfsMultiLocusData,Reps);
        parametersAndLogLkl.append((BT[0],BT[1], -f)) # track the parameters for each likihood evaluation
        #print str(BT[0])+" , "+str(BT[1])+" : "+str(f)
        return f #negLogLkl_SplitPairCounts2(spc,B)

    bounds = [(-0.99,20.0),(1.0,100.0)]

    def mycallback(xk, convergence):
        print "mycallback "+str(xk)+" , "+ str(convergence)
        return convergence>1.0

    result = differential_evolution(negLkl, bounds, disp=True, popsize=15, callback=mycallback, recombination=0.7, maxiter=100)
    result.x, result.fun
#### see results in markdown for various runs of the same dataset
differential_evolution step 1: f(x)= 4840.34 mycallback [-0.07549672 8.09048459] , 0.0799216743708 differential_evolution step 2: f(x)= 4717.08 mycallback [ 1.27948965 11.49985163] , 0.0890399767062 differential_evolution step 3: f(x)= 4717.08 mycallback [ 1.27948965 11.49985163] , 0.117261285127 differential_evolution step 4: f(x)= 4717.08
REPS=100
differential_evolution step 1: f(x)= 4840.34
mycallback [-0.07549672  8.09048459] , 0.0799216743708
differential_evolution step 2: f(x)= 4717.08
mycallback [  1.27948965  11.49985163] , 0.0890399767062
differential_evolution step 3: f(x)= 4717.08
mycallback [  1.27948965  11.49985163] , 0.117261285127
differential_evolution step 4: f(x)= 4717.08

With just 10 particles (REPS=10) there is too much uncertainty in likelihood estimates for stable convergence of the stochastic optimisation algorithm.

REPS=10
differential_evolution step 1: f(x)= 5501.14
mycallback [  6.67340526  14.94882129] , 0.0856814618557
differential_evolution step 2: f(x)= 5454.21
mycallback [ 19.73690787  11.70403212] , 0.102796928814
differential_evolution step 3: f(x)= 5386.65
mycallback [  3.73906694  15.47908072] , 0.198542855185
differential_evolution step 4: f(x)= 5386.65
mycallback [  3.73906694  15.47908072] , 0.260265815629
differential_evolution step 5: f(x)= 5375.97differential_evolution step 1: f(x)= 5501.14
mycallback [  6.67340526  14.94882129] , 0.0856814618557
differential_evolution step 2: f(x)= 5454.21
mycallback [ 19.73690787  11.70403212] , 0.102796928814
differential_evolution step 3: f(x)= 5386.65
mycallback [  3.73906694  15.47908072] , 0.198542855185
differential_evolution step 4: f(x)= 5386.65
mycallback [  3.73906694  15.47908072] , 0.260265815629
differential_evolution step 5: f(x)= 5375.97
mycallback [  5.91624121  14.41968427] , 1.01879779978
(array([  5.91624121,  14.41968427]), 5375.9717345313738)

CPU time: 5724.49 s, Wall time: 5794.37 s
%time
from scipy.optimize import rosen, differential_evolution
Reps=100; # number of particles with 100 is taking too long in this naive implementation, but REPS=20 has too much vriance in likelihood estimate!
parametersAndLogLkl=[]
for g in [0]:
    #GrRates = GetRatesForGrowth(g,n,1000) # slow - when g=0 we can do it analytically as below
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    #logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps)
    def negLkl(BT):
        f = -LklOfBetaThetaGrowthRates(RR(BT[0]),RR(BT[1]/4.0),g,GrRates,n,msSfsMultiLocusData,Reps);
        parametersAndLogLkl.append((BT[0],BT[1], -f)) # track the parameters for each likihood evaluation
        #print str(BT[0])+" , "+str(BT[1])+" : "+str(f)
        return f #negLogLkl_SplitPairCounts2(spc,B)

    bounds = [(-0.99,20.0),(1.0,150.0)]

    def mycallback(xk, convergence):
        print "mycallback "+str(xk)+" , "+ str(convergence)
        return convergence>1.0

    result = differential_evolution(negLkl, bounds, disp=True, popsize=25, callback=mycallback, recombination=0.7, maxiter=100)
    # Reps=10 : popsize=15, recombination=0.7, maxiter=10 (array([  5.89973178,  61.12517964]), 24876.259063730031)
    # Reps =10; beta, mu, g, lkl, logLkl =  8.27364821134604  , 17.2526514877720  , 0.000000000000000  , 0.0  , -24674.2850081
    #bounds = [(0,2), (0, 2)]
    #result = differential_evolution(rosen, bounds)
    result.x, result.fun
#### see results in markdown for various runs of the same dataset
from scipy.optimize import rosen, differential_evolution
# this is very slow even with 10 particles .... :( but gets the ball-park answer betaHat,thetaHat, -logLkl = [ -0.02068417,  13.1918302 ]), 5217.4830580740918
Reps=2; # number of particles with 100 is taking too long in this naive implementation
for g in GrowthList:
    #GrRates = GetRatesForGrowth(g,n,1000) # slow - when g=0 we can do it analytically as below
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    #logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps)
    def negLkl(BT):
        f = -LklOfBetaThetaGrowthRates(RR(BT[0]),RR(BT[1]/4.0),g,GrRates,n,msSfsMultiLocusData,Reps)
        #print BT[0],BT[1],f
        return f #negLogLkl_SplitPairCounts2(spc,B)

    bounds = [(-0.99,10.0),(1.0,50.0)]
    result = differential_evolution(negLkl, bounds, disp=True, popsize=10,maxiter=2)
    #bounds = [(0,2), (0, 2)]
    #result = differential_evolution(rosen, bounds)
    result.x, result.fun
beta, mu, g, lkl, logLkl = 0.192638876372166 , 3.10305863202379 , 0.000000000000000 , 0.0 , -5959.36028135 beta, mu, g, lkl, logLkl = 2.67132512883625 , 12.4986014807133 , 0.000000000000000 , 0.0 , -7372.41751058 beta, mu, g, lkl, logLkl = 7.49799987485964 , 1.97524202923657 , 0.000000000000000 , 0.0 , -6026.14589091 beta, mu, g, lkl, logLkl = -0.0285043371578597 , 7.46666808400616 , 0.000000000000000 , 0.0 , -6685.24590411 beta, mu, g, lkl, logLkl = 3.34795982225950 , 11.6497056655876 , 0.000000000000000 , 0.0 , -7278.767204 beta, mu, g, lkl, logLkl = 2.23636457002183 , 8.67266627919945 , 0.000000000000000 , 0.0 , -6876.02919136 beta, mu, g, lkl, logLkl = 3.78436729022016 , 6.09873216236385 , 0.000000000000000 , 0.0 , -6412.68424573 beta, mu, g, lkl, logLkl = 8.50757204007902 , 9.92991203928796 , 0.000000000000000 , 0.0 , -7113.78218942 beta, mu, g, lkl, logLkl = 6.06243292729823 , 0.548945366636503 , 0.000000000000000 , 0.0 , -7565.39248692 beta, mu, g, lkl, logLkl = 4.68965901099694 , 3.66388200572125 , 0.000000000000000 , 0.0 , -6246.55179671 beta, mu, g, lkl, logLkl = 4.35688866699788 , 10.6428001005425 , 0.000000000000000 , 0.0 , -7110.14703741 beta, mu, g, lkl, logLkl = 9.16936331214881 , 6.72651133507723 , 0.000000000000000 , 0.0 , -6544.03642779 beta, mu, g, lkl, logLkl = 9.85781452108139 , 1.30484065019712 , 0.000000000000000 , 0.0 , -6328.42461171 beta, mu, g, lkl, logLkl = 0.816395524051414 , 2.36332553286671 , 0.000000000000000 , 0.0 , -6016.30390969 beta, mu, g, lkl, logLkl = 7.21413656472797 , 9.31191598328838 , 0.000000000000000 , 0.0 , -7082.62897066 beta, mu, g, lkl, logLkl = 6.33536935538389 , 10.9597156910161 , 0.000000000000000 , 0.0 , -7149.73061066 beta, mu, g, lkl, logLkl = 8.27055336661729 , 4.97902583659657 , 0.000000000000000 , 0.0 , -6311.30477347 beta, mu, g, lkl, logLkl = 1.35918783360545 , 7.96286047667110 , 0.000000000000000 , 0.0 , -6628.6018027 beta, mu, g, lkl, logLkl = -0.466719496508359 , 5.57985090985656 , 0.000000000000000 , 0.0 , -6575.92260353 beta, mu, g, lkl, logLkl = 5.18711156639093 , 4.37825362756296 , 0.000000000000000 , 0.0 , -6322.40115164 beta, mu, g, lkl, logLkl = 1.67494249329244 , 9.67447060439608 , 0.000000000000000 , 0.0 , -6985.4346397 beta, mu, g, lkl, logLkl = 7.69490521727455 , 0.689216967151480 , 0.000000000000000 , 0.0 , -7275.37049679 beta, mu, g, lkl, logLkl = 3.49039227568789 , 10.3317546849555 , 0.000000000000000 , 0.0 , -7087.19841019 beta, mu, g, lkl, logLkl = 6.92038038979015 , 5.26746915289674 , 0.000000000000000 , 0.0 , -6313.42500847 beta, mu, g, lkl, logLkl = 6.95244948857194 , 3.45260834656504 , 0.000000000000000 , 0.0 , -6130.01887935 beta, mu, g, lkl, logLkl = -0.503205832268426 , 0.397528088764742 , 0.000000000000000 , 0.0 , -8442.41312608 beta, mu, g, lkl, logLkl = 3.78436729022016 , 1.35255462808006 , 0.000000000000000 , 0.0 , -6428.62478199 beta, mu, g, lkl, logLkl = 4.67615534518670 , 3.67883562201361 , 0.000000000000000 , 0.0 , -6209.3237204 beta, mu, g, lkl, logLkl = 6.06243292729823 , 8.23010766628102 , 0.000000000000000 , 0.0 , -6811.31764995 beta, mu, g, lkl, logLkl = 2.73429535133285 , 1.56861914031829 , 0.000000000000000 , 0.0 , -6179.49330062 beta, mu, g, lkl, logLkl = 2.96991796963890 , 3.64417865440990 , 0.000000000000000 , 0.0 , -6251.37216326 beta, mu, g, lkl, logLkl = 3.96307698196270 , 6.72651133507723 , 0.000000000000000 , 0.0 , -6590.47598199 beta, mu, g, lkl, logLkl = 5.16361988756579 , 10.8458915475144 , 0.000000000000000 , 0.0 , -7221.92190556 beta, mu, g, lkl, logLkl = 1.40874977574457 , 2.84325557950820 , 0.000000000000000 , 0.0 , -5991.35443199 beta, mu, g, lkl, logLkl = 0.00365597770044435 , 2.30213555712210 , 0.000000000000000 , 0.0 , -6134.61393871 beta, mu, g, lkl, logLkl = 8.71089382474266 , 1.40613546192693 , 0.000000000000000 , 0.0 , -6361.96423749 beta, mu, g, lkl, logLkl = 7.18125606152488 , 4.97902583659657 , 0.000000000000000 , 0.0 , -6309.35420598 beta, mu, g, lkl, logLkl = 1.57668630043092 , 7.96286047667110 , 0.000000000000000 , 0.0 , -6765.12718023 beta, mu, g, lkl, logLkl = -0.744870852752415 , 9.24946143904115 , 0.000000000000000 , 0.0 , -7133.00470978 beta, mu, g, lkl, logLkl = 5.18711156639093 , 4.55582647046258 , 0.000000000000000 , 0.0 , -6297.25132715 differential_evolution step 1: f(x)= 5959.36 beta, mu, g, lkl, logLkl = 0.192638876372166 , 5.33506215577862 , 0.000000000000000 , 0.0 , -6233.43365021 beta, mu, g, lkl, logLkl = 1.66364081833354 , 5.11887655987214 , 0.000000000000000 , 0.0 , -6262.96622871 beta, mu, g, lkl, logLkl = 0.338538343347064 , 4.38826218742826 , 0.000000000000000 , 0.0 , -6367.89190528 beta, mu, g, lkl, logLkl = 3.14236397409478 , 3.35484287574148 , 0.000000000000000 , 0.0 , -6104.87506939 beta, mu, g, lkl, logLkl = 5.86812039065937 , 5.84266857414679 , 0.000000000000000 , 0.0 , -6520.56924811 beta, mu, g, lkl, logLkl = 2.23636457002183 , 0.827913436542197 , 0.000000000000000 , 0.0 , -6952.95971211 beta, mu, g, lkl, logLkl = 7.25750359508364 , 3.99293525185519 , 0.000000000000000 , 0.0 , -6087.1047052 beta, mu, g, lkl, logLkl = 3.86074216660030 , 5.37814437768196 , 0.000000000000000 , 0.0 , -6231.30999424 beta, mu, g, lkl, logLkl = 3.81423543974023 , 3.81494975942578 , 0.000000000000000 , 0.0 , -6042.67108275 beta, mu, g, lkl, logLkl = 3.69353766263250 , 2.46897426036644 , 0.000000000000000 , 0.0 , -6142.37195855 beta, mu, g, lkl, logLkl = 1.11747214582904 , 4.39525471785516 , 0.000000000000000 , 0.0 , -6180.68009054 beta, mu, g, lkl, logLkl = -0.145170418590930 , 4.01785603039970 , 0.000000000000000 , 0.0 , -6256.13525601 beta, mu, g, lkl, logLkl = 9.85781452108139 , 3.04252144682377 , 0.000000000000000 , 0.0 , -6092.50468237 beta, mu, g, lkl, logLkl = 1.45813066782271 , 6.55180558519940 , 0.000000000000000 , 0.0 , -6667.19188988 beta, mu, g, lkl, logLkl = 3.69548178858569 , 2.58566058385317 , 0.000000000000000 , 0.0 , -6176.88539186 beta, mu, g, lkl, logLkl = 2.36042425504495 , 2.71042836372860 , 0.000000000000000 , 0.0 , -6025.08222264 beta, mu, g, lkl, logLkl = -0.722998687542751 , 4.19536282629316 , 0.000000000000000 , 0.0 , -6278.88214632 beta, mu, g, lkl, logLkl = 1.35918783360545 , 4.36786205432543 , 0.000000000000000 , 0.0 , -6252.91619478 beta, mu, g, lkl, logLkl = 3.99384188106299 , 1.62152859537633 , 0.000000000000000 , 0.0 , -6417.61327055 beta, mu, g, lkl, logLkl = 4.56751725155766 , 4.55582647046258 , 0.000000000000000 , 0.0 , -6119.39907965 differential_evolution step 2: f(x)= 5959.36 beta, mu, g, lkl, logLkl = 0.192638876372166 , 3.10305863202379 , 0.000000000000000 , 0.0 , -6131.56057775 beta, mu, g, lkl, logLkl = 0.192638886372166 , 3.10305863202379 , 0.000000000000000 , 0.0 , -6078.25877917 beta, mu, g, lkl, logLkl = 0.192638876372166 , 3.10305863452379 , 0.000000000000000 , 0.0 , -6284.41688891 beta, mu, g, lkl, logLkl = 10.0000000000000 , 0.250000000000000 , 0.000000000000000 , 0.0 , -9137.76313061 beta, mu, g, lkl, logLkl = 10.0000000100000 , 0.250000000000000 , 0.000000000000000 , 0.0 , -9149.80345139 beta, mu, g, lkl, logLkl = 10.0000000000000 , 0.250000002500000 , 0.000000000000000 , 0.0 , -9309.20554021 beta, mu, g, lkl, logLkl = 2.43072387055414 , 2.45197750720723 , 0.000000000000000 , 0.0 , -6126.73018708 beta, mu, g, lkl, logLkl = 2.43072388055414 , 2.45197750720723 , 0.000000000000000 , 0.0 , -5983.82942447 beta, mu, g, lkl, logLkl = 2.43072387055414 , 2.45197750970723 , 0.000000000000000 , 0.0 , -6263.1118694 beta, mu, g, lkl, logLkl = 0.613854406619796 , 2.98052285615436 , 0.000000000000000 , 0.0 , -6088.00262364 beta, mu, g, lkl, logLkl = 0.613854416619796 , 2.98052285615436 , 0.000000000000000 , 0.0 , -6036.85110754 beta, mu, g, lkl, logLkl = 0.613854406619796 , 2.98052285865436 , 0.000000000000000 , 0.0 , -5986.87893203 beta, mu, g, lkl, logLkl = 0.354165950124406 , 3.05606880405018 , 0.000000000000000 , 0.0 , -6026.5351121 beta, mu, g, lkl, logLkl = 0.354165960124406 , 3.05606880405018 , 0.000000000000000 , 0.0 , -6062.35359389 beta, mu, g, lkl, logLkl = 0.354165950124406 , 3.05606880655018 , 0.000000000000000 , 0.0 , -6172.25697922 beta, mu, g, lkl, logLkl = 0.233351673085460 , 3.09121487562159 , 0.000000000000000 , 0.0 , -6060.43865358 beta, mu, g, lkl, logLkl = 0.233351683085460 , 3.09121487562159 , 0.000000000000000 , 0.0 , -6051.7120833 beta, mu, g, lkl, logLkl = 0.233351673085460 , 3.09121487812159 , 0.000000000000000 , 0.0 , -6019.3662997 beta, mu, g, lkl, logLkl = 0.207377943213967 , 3.09877089127978 , 0.000000000000000 , 0.0 , -6109.53214353 beta, mu, g, lkl, logLkl = 0.207377953213967 , 3.09877089127978 , 0.000000000000000 , 0.0 , -6125.10610218 beta, mu, g, lkl, logLkl = 0.207377943213967 , 3.09877089377978 , 0.000000000000000 , 0.0 , -6155.02525146 beta, mu, g, lkl, logLkl = 0.197165171051696 , 3.10174188800430 , 0.000000000000000 , 0.0 , -6245.20614406 beta, mu, g, lkl, logLkl = 0.197165181051696 , 3.10174188800430 , 0.000000000000000 , 0.0 , -6104.44659838 beta, mu, g, lkl, logLkl = 0.197165171051696 , 3.10174189050430 , 0.000000000000000 , 0.0 , -6082.23437022 beta, mu, g, lkl, logLkl = 0.194314419637385 , 3.10257119986789 , 0.000000000000000 , 0.0 , -6094.10604157 beta, mu, g, lkl, logLkl = 0.194314429637385 , 3.10257119986789 , 0.000000000000000 , 0.0 , -6019.46527039 beta, mu, g, lkl, logLkl = 0.194314419637385 , 3.10257120236789 , 0.000000000000000 , 0.0 , -6125.24844111 beta, mu, g, lkl, logLkl = 0.193080660132688 , 3.10293011274572 , 0.000000000000000 , 0.0 , -6272.78116254 beta, mu, g, lkl, logLkl = 0.193080670132688 , 3.10293011274572 , 0.000000000000000 , 0.0 , -6126.20077927 beta, mu, g, lkl, logLkl = 0.193080660132688 , 3.10293011524572 , 0.000000000000000 , 0.0 , -6054.71655418 beta, mu, g, lkl, logLkl = 0.192822702475048 , 3.10300515518706 , 0.000000000000000 , 0.0 , -6106.41518998 beta, mu, g, lkl, logLkl = 0.192822712475048 , 3.10300515518706 , 0.000000000000000 , 0.0 , -5987.8044366 beta, mu, g, lkl, logLkl = 0.192822702475048 , 3.10300515768706 , 0.000000000000000 , 0.0 , -6150.26753875 beta, mu, g, lkl, logLkl = 0.192682158303393 , 3.10304604088068 , 0.000000000000000 , 0.0 , -6182.05215543 beta, mu, g, lkl, logLkl = 0.192682168303393 , 3.10304604088068 , 0.000000000000000 , 0.0 , -6081.14160241 beta, mu, g, lkl, logLkl = 0.192682158303393 , 3.10304604338068 , 0.000000000000000 , 0.0 , -6057.26764101 beta, mu, g, lkl, logLkl = 0.192654742319382 , 3.10305401646237 , 0.000000000000000 , 0.0 , -6259.96931151 beta, mu, g, lkl, logLkl = 0.192654752319382 , 3.10305401646237 , 0.000000000000000 , 0.0 , -6226.55881597 beta, mu, g, lkl, logLkl = 0.192654742319382 , 3.10305401896237 , 0.000000000000000 , 0.0 , -6240.92287717 beta, mu, g, lkl, logLkl = 0.192644034400764 , 3.10305713150211 , 0.000000000000000 , 0.0 , -6161.27725395 beta, mu, g, lkl, logLkl = 0.192644044400764 , 3.10305713150211 , 0.000000000000000 , 0.0 , -6178.52908633 beta, mu, g, lkl, logLkl = 0.192644034400764 , 3.10305713400211 , 0.000000000000000 , 0.0 , -6189.52660829 beta, mu, g, lkl, logLkl = 0.192640537825439 , 3.10305814869055 , 0.000000000000000 , 0.0 , -6181.86672518 beta, mu, g, lkl, logLkl = 0.192640547825439 , 3.10305814869055 , 0.000000000000000 , 0.0 , -6021.01092336 beta, mu, g, lkl, logLkl = 0.192640537825439 , 3.10305815119055 , 0.000000000000000 , 0.0 , -6030.10581767 beta, mu, g, lkl, logLkl = 0.192639447831879 , 3.10305846578049 , 0.000000000000000 , 0.0 , -6017.14996387 beta, mu, g, lkl, logLkl = 0.192639457831879 , 3.10305846578049 , 0.000000000000000 , 0.0 , -6139.22015951 beta, mu, g, lkl, logLkl = 0.192639447831879 , 3.10305846828049 , 0.000000000000000 , 0.0 , -6019.72745884 beta, mu, g, lkl, logLkl = 0.192639050147600 , 3.10305846295499 , 0.000000000000000 , 0.0 , -6048.12201106 beta, mu, g, lkl, logLkl = 0.192639060147600 , 3.10305846295499 , 0.000000000000000 , 0.0 , -6290.14258722 beta, mu, g, lkl, logLkl = 0.192639050147600 , 3.10305846545499 , 0.000000000000000 , 0.0 , -6232.48324842 beta, mu, g, lkl, logLkl = 0.192639388774753 , 3.10305846536090 , 0.000000000000000 , 0.0 , -6131.48137252 beta, mu, g, lkl, logLkl = 0.192639398774753 , 3.10305846536090 , 0.000000000000000 , 0.0 , -6088.72194232 beta, mu, g, lkl, logLkl = 0.192639388774753 , 3.10305846786090 , 0.000000000000000 , 0.0 , -6106.59784861 beta, mu, g, lkl, logLkl = 0.192639430401962 , 3.10305846565666 , 0.000000000000000 , 0.0 , -5980.92387623 beta, mu, g, lkl, logLkl = 0.192639440401962 , 3.10305846565666 , 0.000000000000000 , 0.0 , -6381.52342578 beta, mu, g, lkl, logLkl = 0.192639430401962 , 3.10305846815666 , 0.000000000000000 , 0.0 , -6252.54723446 beta, mu, g, lkl, logLkl = 0.192639418026174 , 3.10305846556873 , 0.000000000000000 , 0.0 , -6133.75725714 beta, mu, g, lkl, logLkl = 0.192639428026174 , 3.10305846556873 , 0.000000000000000 , 0.0 , -6049.16364513 beta, mu, g, lkl, logLkl = 0.192639418026174 , 3.10305846806873 , 0.000000000000000 , 0.0 , -5971.18672324 beta, mu, g, lkl, logLkl = 0.192639427615362 , 3.10305846563686 , 0.000000000000000 , 0.0 , -6128.07517595 beta, mu, g, lkl, logLkl = 0.192639437615362 , 3.10305846563686 , 0.000000000000000 , 0.0 , -5994.94295566 beta, mu, g, lkl, logLkl = 0.192639427615362 , 3.10305846813686 , 0.000000000000000 , 0.0 , -6167.57086263 beta, mu, g, lkl, logLkl = 0.192639430127376 , 3.10305846565470 , 0.000000000000000 , 0.0 , -6339.1254703 beta, mu, g, lkl, logLkl = 0.192639440127376 , 3.10305846565470 , 0.000000000000000 , 0.0 , -5981.17374362 beta, mu, g, lkl, logLkl = 0.192639430127376 , 3.10305846815471 , 0.000000000000000 , 0.0 , -6157.72285945 beta, mu, g, lkl, logLkl = 0.192639430401962 , 3.10305846565666 , 0.000000000000000 , 0.0 , -6083.80831441 beta, mu, g, lkl, logLkl = 0.192639440401962 , 3.10305846565666 , 0.000000000000000 , 0.0 , -6173.33870907 beta, mu, g, lkl, logLkl = 0.192639430401962 , 3.10305846815666 , 0.000000000000000 , 0.0 , -6051.74194185 (array([ 0.19263888, 12.41223453]), 5959.3602813466023)

If the above is run long enough it goes to (0.0,10.0).


A Complex Demographic Structured Sample

We will generates SFS using Hudson's ms using the scheme described by Fig. 3 on Page 19 of msdoc.pdf and estimate the effective beta-splitting population size.

n=15; NumLoci=100; TrueTheta=10.0; # make sure these agree with `ms n NumLoci -t TrueTheta ...` in the `%sh mc .. ` cell below
N0=1.0; TrueG=0.0; TrueBeta=0.0; # actually TrueBeta is unknown or not applicable
%sh
#ms 30 20 -t 10.0 -I 6 0 14 0 0 16 0 -m 1 2 2.5 -m 2 1 2.5 -m 2 3 2.5 -m 3 2 2.5 -m 4 5 2.5 -m 5 4 2.5 -m 5 6 2.5 -m 6 5 2.5 -em 2.0 3 4 2.5 -em 2.0 4 3 2.5 | sample_statsSFS > output2


%sh head -n 4 /home/user/msdir/output2
96 26 16 54 22 0 14 0 0 71 0 0 0 0 30 13 61 28 2 0 5 31 0 0 0 34 0 0 32 23 2 2 2 6 42 53 0 0 0 0 0 0 35 2 12 2 0 6 100 86 0 0 0 0 0 0
%sh
ms 15 100 -t 10.0 -I 6 0 7 0 0 8 0 -m 1 2 2.5 -m 2 1 2.5 -m 2 3 2.5 -m 3 2 2.5 -m 4 5 2.5 -m 5 4 2.5 -m 5 6 2.5 -m 6 5 2.5 -em 2.0 3 4 2.5 -em 2.0 4 3 2.5 | sample_statsSFS > output2

msSfsDataRaw = np.genfromtxt('/home/user/msdir/output2', delimiter=' ')
msSfsMultiLocusData = [np.insert( np.insert(a, -1, 0., axis=0), 0, 0., axis=0) for a in msSfsDataRaw]
#msSfsMultiLocusData
# the Watterson estimator of theta under panmictic Kingman prior
print (mean([np.sum(msSfsMultiLocusData[j]) for j in range(0,NumLoci)])) / (sum(np.array([1.0/i for i in range(1,n,1)])))
77.8733342835
# Likelihood based on the full list of SFS
# dataset Hudson's Fig 3 - fitting with Beta-splitting Effective Population Size
set_random_seed(9842245)
np.random.seed(int(4115345))

BetaList=[-1.0,0.0,5.0,10.0];
GrowthList=[0.0];
ThetaList=[1,10,100,1000];
results=[]
Reps=1000; # number of particles
for g in GrowthList:
    #GrRates = GetRatesForGrowth(g,n,1000)
    GrRates=[0.,0.];
    for i in range(2,n+1,1):
        GrRates.append(1.0*binomial(i,2))
    for beta in BetaList:
        for theta in ThetaList:
            logLAcrossAllLoci = LklOfBetaThetaGrowthRates(beta,theta/4.0,g,GrRates,n,msSfsMultiLocusData,Reps) # !!! note the theta/4.0 !!!
            #print "mu*4 is Hudson's theta and this TrueTheta = "+str(TrueTheta)
            print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
            results.append([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
# report the most likely values
print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]")
MaxI=0
for i in range(len(results)):
    if results[i][5]>results[MaxI][5]:
       MaxI=i
results[MaxI]
#[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
#[20, 30, 20, 5.00000000000000, 100, -4912.0378080548035]
#[100, 30, 20, 5.00000000000000, 100, -4699.5032281274443]

Results of a run with the complex Fig.3 for msdoc.pdf chooses a more balanced beta and larger theta.

[1000, 15, 100, -1.00000000000000, 1, -inf]
[1000, 15, 100, -1.00000000000000, 10, -12061.540375880273]
[1000, 15, 100, -1.00000000000000, 100, -6100.9810259876103]
[1000, 15, 100, -1.00000000000000, 1000, -11881.563131517965]
[1000, 15, 100, 0.000000000000000, 1, -inf]
[1000, 15, 100, 0.000000000000000, 10, -11864.359615167663]
[1000, 15, 100, 0.000000000000000, 100, -5795.9160992013267]
[1000, 15, 100, 0.000000000000000, 1000, -11594.056550711328]
[1000, 15, 100, 5.00000000000000, 1, -inf]
[1000, 15, 100, 5.00000000000000, 10, -11736.957816906755]
[1000, 15, 100, 5.00000000000000, 100, -5734.6545990340501]
[1000, 15, 100, 5.00000000000000, 1000, -11523.970285428481]
[1000, 15, 100, 10.0000000000000, 1, -inf]
[1000, 15, 100, 10.0000000000000, 10, -11734.261194892686]
[1000, 15, 100, 10.0000000000000, 100, -5812.3707795154705]
[1000, 15, 100, 10.0000000000000, 1000, -11545.693530758796]
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
[1000, 15, 100, 5.00000000000000, 100, -5734.6545990340501]

There is variance in the likelihood estimates which decreases as a function of the number of particles.

[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5532.3529474155057]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5218.1004939060304]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5287.3870924684761]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5265.937482150388]
[10, 30, 20, 5.00000000000000, 100.000000000000, -5280.421730323982]

[100, 30, 20, 5.00000000000000, 100.000000000000, -4751.6168398897598]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4677.2325203708433]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4811.3257313336544]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4799.1290494359873]
[100, 30, 20, 5.00000000000000, 100.000000000000, -4739.2569980055923]

[1000, 30, 20, 5.00000000000000, 100.000000000000, -4443.9865339510743]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4484.4008663487266]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4451.5259917605035]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4475.0765597760737]
[1000, 30, 20, 5.00000000000000, 100.000000000000, -4441.5184195452257]
r10=[-5532.3529474155057, -5218.1004939060304, -5287.3870924684761, -5265.937482150388, -5280.421730323982]
r100=[-4751.6168398897598, -4677.2325203708433, -4811.3257313336544, -4799.1290494359873, -4739.2569980055923]
r1000=[-4443.9865339510743, -4484.4008663487266, -4451.5259917605035, -4475.0765597760737, -4441.5184195452257]
print('num of particles, mean log lkh, std loglkh')
print(10,mean(r10),std(r10))
print(100,mean(r100),std(r100))
print(1000,mean(r1000),std(r1000))
num of particles, mean log lkh, std loglkh (10, -5316.83994925288, 123.470797173940) (100, -4755.712227807167, 53.44271101877941) (1000, -4459.301674276321, 19.30074717347509)
Reps=100; beta=5.0; theta=100.0; growth=0.0;
print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]")
for i in range(5):
    logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps)
    print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] [100, 30, 20, 5.00000000000000, 100.000000000000, -4751.6168398897598] [100, 30, 20, 5.00000000000000, 100.000000000000, -4677.2325203708433] [100, 30, 20, 5.00000000000000, 100.000000000000, -4811.3257313336544] [100, 30, 20, 5.00000000000000, 100.000000000000, -4799.1290494359873] [100, 30, 20, 5.00000000000000, 100.000000000000, -4739.2569980055923]
Reps=1000; beta=5.0; theta=100.0; growth=0.0;
print("[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci]")
for i in range(5):
    logLAcrossAllLoci=LklOfBetaThetaGrowthRates(beta,theta/4.0,growth,GrRates,n,msSfsMultiLocusData,Reps)
    print([Reps,n,NumLoci,beta,theta,logLAcrossAllLoci])
[Reps, n, NumLoci2, betaHat, thetaHat, logLAcrossAllLoci] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4443.9865339510743] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4484.4008663487266] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4451.5259917605035] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4475.0765597760737] [1000, 30, 20, 5.00000000000000, 100.000000000000, -4441.5184195452257]
5b212958-4b2c-465f-95a4-4df0ea57aa52