# likelihood function getLike = function(x){ #takes vector of length 2: (allele freq, count); returns sum log ifelse(x[1] > 0 & x[1] < 1, return(x[2]*log(x[1]) + (2-x[2])*log(1-x[1])), 2*log(1e-4)); } #structure algorithm 1 structure = function(data, numdraws, numskip, numburn, samplesToGuess, K, Z){ nsamples = nrow(data); nsnps = ncol(data); draws = 0; Z.draws = array(,dim=0); #initialize to be empty draw = numburn; #draw is a counter that tells us when to keep a sample P=matrix(0,nr=K,nc=ncol(data)) minp = 1e-16; require(tcltk); #used for progress bar below pb = txtProgressBar(min = 0, max = numdraws, style = 3) minz = 1e-4; while(draws < numdraws){ ZP=matrix(,nr=length(samplesToGuess), nc=K); for(k in 1:K){ P[k,] = apply(data, 2, function(x){ return(sum(x[which(Z==k)], na.rm=T))}); P[k,] = rbeta(nsnps, 1+P[k,], 1+2*sum(Z==k)-P[k,]) if(1){ foo=apply(rbind(P[k,], data[samplesToGuess,]), 2, function(x){ sapply(x[2:length(x)], function(y){ getLike(c(x[1],y)) }) }) foo2=apply(foo,1,sum) ZP[,k]=exp(foo2) } else{ for(i in 1:length(samplesToGuess)){ ZP[i,k] = exp(sum(apply(rbind(P[k,], data[samplesToGuess[i],]), 2, function(x){ getLike(x) }))); } } } ZP = t(apply(ZP[1:length(samplesToGuess),], 1, function(x){ x/sum(x)})); Z[samplesToGuess] = apply(ZP[1:length(samplesToGuess),], 1, function(x){ sample(1:k, size=1, prob=x) }); #cat("\n",79,": ",ZP[4,],"\t",Z[79],"\n"); #cat(80,": ",ZP[5,],"\t",Z[80],"\n"); draw = draw-1; if(draw <= 0){ Z.draws = rbind(Z.draws, Z); #cat("\n",draw,"\t",Z[samplesToGuess],"\n") flush.console(); draws = draws+1; draw = numskip+1; setTxtProgressBar(pb, draws) flush.console(); } } return(Z.draws); } {#read data from file args = commandArgs(trailingOnly=TRUE) data=read.table(args[1],header=FALSE) labels=data[,1] data=data[,2:length(data[,1])] samplesToGuess=c(76:nrow(data)) K=3 Z = c(rep(1,25), rep(2,25), rep(3,25), sample(1:K, size=nrow(data)-75, replace=T)); ptm <- proc.time() Z=structure(data, 200, 10, 10, samplesToGuess, K, Z) cat("\nruntime=",(proc.time() - ptm)[1],"s\n", sep="") apply(Z,1,mean) apply(Z,2,mean) for(i in samplesToGuess){ cat(i,"\t"); for(k in 1:K){ cat(mean(Z[,i]==k),"\t"); } cat("\n"); } }