.packageName <- "MCPDT"
`DSP` <-

function (ped,allele)

#Calculate D statistic for DSP

#ped: affected at row 1, unaffected at row 2

#allele: allele of interest

{

  Dstat <- sum(ped[1,7:8]==allele) - sum(ped[2,7:8]==allele)

  Dstat

}





`DSP.x` <-

function (ped,allele,male)

#Calculate D statistic for DSP of same sex

#ped: affected at row 1, unaffected at row 2

#allele: allele of interest

#male: whether sibs are male 

{

if (male) Dstat <- (ped[1,7]==allele) - (ped[2,7]==allele)

else Dstat <- sum(ped[1,7:8]==allele) - sum(ped[2,7:8]==allele)

Dstat

}





`DtoT` <-

function(D) {

  sum(D)/sqrt(sum(D*D))

}







`MCPDT` <-

function(ped,mcsize,loci=NULL,af=NULL,sexlinked=FALSE) 



#PDT with Monte Carlo using slink

#Input has multiple pedigrees

  

#ped: the name of a standard linkage pedigree file or

#     a matrix/dataframe containing the pedigrees

#loci: the name of a standard linkage loci file

#af: dataframe/matrix of marker allele frequencies,

#          one column for each marker locus

#          If af=NA, it will be estimated from typed founders      

#mcsize: number of Monte Carlo simulations

#sexlinked: whether the markers are on X chromosome



#ped is required. If loci is provided, af is read

#from the file specified. Otherwise, it can be provided directly in the

#function input arguments. If af is not provided in either way, it

#will be estimated from the typed founders' genotypes.

#In case of multiple markers, each marker will be analyzed seperately



{



  marker.name <- NULL

  

  #read in pedigree file if it is provided

  if (is.character(ped)) {

    pedfile <- ped

    ped <- read.table(pedfile,header=F,as.is=T)

    n.loci <- (dim(ped)[2] - 6) / 2

    n.ind <- dim(ped)[1]

  }

  else if(is.data.frame(ped)) {

    n.loci <- (dim(ped)[2] - 6) / 2

    n.ind <- dim(ped)[1]

    marker.name <- names(ped)[5+2*(1:n.loci)] #take marker names from ped

  }

  else if(is.matrix(ped)) {

    n.loci <- (dim(ped)[2] - 6) / 2

    n.ind <- dim(ped)[1]

    if(!is.null(dimnames(ped)))

      marker.name <- dimnames(ped)[[2]][5+2*(1:n.loci)]

  }



  ped <- as.matrix(ped)

  

  #process the pedigree file with makeped, may cause reordering of individuals

  #in each pedigree

  

  ped.make <- makeped(cbind(ped[,1:5],1:n.ind))

    #post makeped format

  ped.make <- cbind(ped.make[,1:9],ped[ped.make[,10],c(6,6+1:(2*n.loci))])

    #pre makeped format

  ped <- ped.make[,c(1:4,8,10,10+1:(2*n.loci))]



  #read in loci file if it is provided

  if (is.character(loci)) {

     locifile <- loci

     loci.raw <- readLines(locifile) #raw input

     loci.raw2 <- strsplit(loci.raw, " ")

     n.loci.2 <- as.integer(loci.raw2[[1]][1])

     if (n.loci != n.loci.2)

       stop("Numbers of loci in pedigree data and marker data don't match")

     sexlinked = (as.integer(loci.raw2[[1]][3]) == 1)

     af <- matrix(0,nrow=2,ncol=n.loci)

     for (i in 1:n.loci) 

       af[,i] <- as.numeric(loci.raw2[[3+i*2]][1:2])

     if ("#" %in% strsplit(loci.raw[4],"") && is.null(marker.name)) {

       marker.name <- character(n.loci)

       for(i in 1:n.loci)

         marker.name[i] <- strsplit(loci.raw[2+i*2],"#")[[1]][2]

     }

   }

  else if (is.null(af)[1]) {

    af <- matrix(0,ncol=n.loci,nrow=2)

    if(sexlinked)

      no.allele <- ped[ped[,3]==0,5] #number of alleles in founders

    else

      no.allele <- 2

    for (i in 1:n.loci) {

      founder.geno <- ped[ped[,3]==0, 6+(i-1)*2+1:2]

      af[1,i] <- sum((founder.geno==1)*no.allele)/

        sum((founder.geno>0)*no.allele)

      af[2,i] <- 1 - af[1,i]

    }

  }

        

  if (is.null(dimnames(af)) && is.null(marker.name)) {

    marker.name <- paste("Marker",1:n.loci)

    dimnames(af) <- list(1:2,marker.name)

  }

  else if(!is.null(dimnames(af)) && is.null(marker.name))

    marker.name <- dimnames(af)[[2]]

    

  pvalue <- matrix(0,ncol=2,nrow=n.loci)

  dimnames(pvalue) <- list(marker.name, c("PDT","MCPDT"))

  

  Tstat <- matrix(0,ncol=2,nrow=n.loci)

  dimnames(Tstat) <- list(marker.name, c("PDT","MCPDT")) 

  

  n.family <- length(unique(ped[,1]))

  n.ind <- dim(ped)[1]

  

  offpos <- which(ped[,3] != 0)

  no.off <- length(offpos)

  offused <- rep(0, no.off)

  

  nufampos <- numeric(0)

  nufampos.fm <- numeric(0) #positions of father and mother of each nuclear fam

  nufam.id <- numeric(0)

  orifam.id <- numeric(0)

  

  i <- 0

  

  repeat {

    off <- offpos[offused==0][1]

    if (is.na(off)) break

    i <- i+1

    fid <- ped[off,3]

    mid <- ped[off,4]

    fpos <- which(ped[,1] == ped[off,1] & ped[,2] == fid)

    mpos <- which(ped[,1] == ped[off,1] & ped[,2] == mid)

    alloff <- which(ped[,1] == ped[off,1] &

                    ped[,3] == fid & ped[,4] == mid)

    offused[match(alloff,offpos)] <- 1

    allpos <- c(fpos,mpos,alloff)

    nufampos.fm <- c(nufampos.fm,length(nufampos)+1:2)

    nufampos <- c(nufampos,allpos)

    nufam.id <- c(nufam.id, rep(i,length(allpos)))

    orifam.id <- c(orifam.id,rep(ped[off,1],length(allpos)))

  }



  n.ind.nuclear <- length(nufampos)

  

#PDT



  for (i in 1:n.loci) {

    if(sexlinked)

      result <- XPDT(ped,i,1)

    else

      result <- PDT(ped,i,1)

    pvalue[i,1] <- result$P

    Tstat[i,1] <- result$Tstat

  }



#MC-PDT 
  if (sum(ped[,7:(6+2*n.loci)]==0)<0.1) { 
      pvalue[,2] <- pvalue[,1]
      Tstat[,2] <- Tstat[,1]
  }
  else {

  

    for (i in 1:n.loci) {

    

      ped.mc <- slink(cbind(ped.make[,c(1:9,10+(i-1)*2+1:2)],1),

                    af[,i],mcsize,sexlinked)

      ped.mc <- cbind(ped.mc[,c(1:4,8)],ped[,6],ped.mc[,9+1:2])

    

      nuclear.ped.mc <-

        ped.mc[rep(n.ind*0:(mcsize-1),each=n.ind.nuclear) + nufampos,]

      nuclear.ped.mc[,1] <-

        rep(0:(mcsize-1)*max(nufam.id),each=n.ind.nuclear) + nufam.id

      nuclear.ped.mc[rep(n.ind.nuclear*0:(mcsize-1),each=length(nufampos.fm))

                   + nufampos.fm, 3:4] <- 0

      nuclear.ped.mc <- cbind(nuclear.ped.mc,

                            rep(0:(mcsize-1)*max(orifam.id),

                                each=n.ind.nuclear) + orifam.id)

    

     if (sexlinked)

        result <- XPDT.mc(nuclear.ped.mc,mcsize=mcsize)

      else

        result <- PDT.mc(nuclear.ped.mc,mcsize=mcsize)

      pvalue[i,2] <- result$P

      Tstat[i,2] <- result$Tstat

    }
  }



  list(pvalue=pvalue, Tstat=Tstat, mcsize=mcsize, af=af)

}





`PDT` <-

function (peds,locus,allele)

#Calculate the PDT statistic for general pedigrees

#No parental genotype reconstructions

#peds: pedigree data, can have multiple pedigrees and loci

#locus: position of the marker of interest

#allele: allele of interest 

{

#extract pedigree structure and marker of interest

  peds <- peds[,c(1:6,(5:6)+locus*2)]

#extract family ids

  familyid <- unique(peds[,1])

  nfamily <- length(familyid)

#statistic for each family

  Dstat <- rep(0,nfamily)

  for (i in 1:nfamily) {

    #data of one family

    ped <- peds[peds[,1]==familyid[i],]

    #offspring position (easier to code than using offspring id)

    offpos <- which(ped[,3]!=0)

    noff <- length(offpos)

    offused <- rep(0,noff)

    repeat {

	#first unused offspring

      off <- offpos[offused==0][1]

      if (is.na(off)) break

	#father id and mother id

      fid <- ped[off,3]

      mid <- ped[off,4]

      fpos <- which(ped[,2]==fid)

      mpos <- which(ped[,2]==mid)

        #nuclear family

      alloff <- which(ped[,3]==fid & ped[,4]==mid)

      offused[match(alloff,offpos)] <- 1

      allpos <- c(fpos,mpos,alloff)

      Dstat[i] <- Dstat[i] + nufam(ped[allpos,],allele)

    }

  }

#Statisitc for all families

  Tstat <- sum(Dstat)/sqrt(sum(Dstat*Dstat))

  Pvalue <- pnorm(Tstat,0,1,lower.tail=F)

  Pvalue <- 2*min(Pvalue,1-Pvalue) # 2-sided p-value

  list(D=data.frame(family=familyid,Dstat=Dstat),Tstat=Tstat,P=Pvalue)

}





`PDT.mc` <-

function(ped,allele=NULL,mcsize=1)



  #calculate PDT statistics and pvalues for complete pedigree data with no missing genotypes, which may have multiple Monte Carlo samples with the same affection status structure.

  #ped: pedigree data consisting with nuclear families with no missing data, the last column is the original family id, the first column is the nuclear family id, remaining columns are in standard linkage format.

  #allele: alleles at each locus to calculate the XPDT statistcs. The default is allele 1 for each locus.

  #mcsize: Monte Carlo sample size.



{

  totalsize <- dim(ped)[1]

  onesamplesize <- totalsize/mcsize

  ped1 <- ped[1:onesamplesize,] #first sample

  n.family <- length(unique(ped1[,dim(ped)[2]]))

  n.nuclear <- length(unique(ped1[,1]))

  n.marker <- (dim(ped)[[2]]-7)/2

  if (is.null(allele)) allele <- rep(1,n.marker)



  aff.table <- xtabs(~ped1[,1]+ped1[,6],subset=ped1[,3]!="0")

      #use "0" instead of 0 in case the individual ids are characters

  weight <- matrix(0,nrow=n.nuclear,ncol=3)

  dimnames(weight) <- 

    list(NULL,c("parent","uaff","aff"))

  weight[,1] <- aff.table[,"2"]*(-1)

  weight[,2] <- aff.table[,"2"]*(-1)

  if (any(ped1[,6]==1 & ped1[,3] != "0"))

    weight[,3] <- 2+aff.table[,"1"]

  else

    weight[,3] <- 2



  Dstat <- matrix(0,nrow=n.family,ncol=n.marker)

  

  for (i in 1:n.marker) {

    n.allele <- (ped[,5+i*2]==allele[i]) + (ped[,6+i*2]==allele[i])

    n.allele <- n.allele * !(ped[,6]==0 & ped[,3] != "0")

    #offspring with unknown affection status

    type <- (ped[,3]!="0")*ped[,6]+1

    #parent=0*?+1=1, unaff off=1*1+1=2, aff off=1*2+1=3

    D.raw <- xtabs(n.allele*weight[cbind(rep(match(ped1[,1],

                   dimnames(aff.table)[[1]]),mcsize),type)]

                   ~ped[,dim(ped)[2]])

    Dstat[,i] <- apply(matrix(D.raw,ncol=mcsize),1,mean)

  }

                   

  Tstat <- apply(Dstat,2,DtoT)

  pvalue <- 2*(1-pnorm(abs(Tstat)))



  list(D=Dstat,Tstat=Tstat,P=pvalue)

}





`XPDT` <-

function (peds,locus,allele)

#Calculate the XPDT statistic for general pedigrees for X-linked markers

#No parental genotype reconstructions

#peds: pedigree data, can have multiple pedigrees and loci

#locus: position of the marker of interest

#allele: allele of interest 

{

#extract pedigree structure and marker of interest

peds <- peds[,c(1:6,(5:6)+locus*2)]

#extract family ids

familyid <- unique(peds[,1])

nfamily <- length(familyid)

#statistic for each family

Dstat <- rep(0,nfamily)

n_triad <- rep(0,nfamily)

n_DSP <- rep(0,nfamily)

for (i in 1:nfamily) {

    #data of one family

    ped <- peds[peds[,1]==familyid[i],]

    #offspring position (easier to code than using offspring id)

    offpos <- which(ped[,3]!=0)

    noff <- length(offpos)

    offused <- rep(0,noff)

    repeat {

	#first unused offspring

	off <- offpos[offused==0][1]

	if (is.na(off)) break

	#father id and mother id

	fid <- ped[off,3]

	mid <- ped[off,4]

	fpos <- which(ped[,2]==fid)

	mpos <- which(ped[,2]==mid)

	#nuclear family

	alloff <- which(ped[,3]==fid & ped[,4]==mid)

#	print(c(familyid[i],fpos,mpos))

	offused[match(alloff,offpos)] <- 1

	allpos <- c(fpos,mpos,alloff)

	nufamily <- nufam.x(ped[allpos,],allele)

	Dstat[i] <- Dstat[i] + nufamily$D

	n_triad[i] <- n_triad[i] + nufamily$n_triad

	n_DSP[i] <- n_DSP[i] + nufamily$n_DSP

	next

	}

    }

#Statisitc for all families

Tstat <- sum(Dstat)/sqrt(sum(Dstat*Dstat))

Pvalue <- pnorm(Tstat,0,1,lower.tail=F)

Pvalue <- 2*min(Pvalue,1-Pvalue) # 2-sided p-value

list(Dstat=Dstat,Tstat=Tstat,P=Pvalue)

}



`XPDT.mc` <-

function(ped,allele=NULL,mcsize=1)



  #calculate XPDT statistics and pvalues for complete pedigree data with no missing genotypes, which may have multiple Monte Carlo samples with the same affection status structure.

  #ped: pedigree data consisting with nuclear families with no missing data, the last column is the original family id, the first column is the nuclear family id, remaining columns are in standard linkage format.

  #allele: alleles at each locus to calculate the XPDT statistcs. The default is allele 1 for each locus.

  #mcsize: Monte Carlo sample size.



{

  totalsize <- dim(ped)[1]

  onesamplesize <- totalsize/mcsize

  ped1 <- ped[1:onesamplesize,] #first sample

  n.family <- length(unique(ped1[,dim(ped)[2]]))

  n.nuclear <- length(unique(ped1[,1]))

  n.marker <- (dim(ped)[[2]]-7)/2

  if (is.null(allele)) allele <- rep(1,n.marker)



  #add an artificial family to guanratee there are affected and unaffected

  #male and female offspring in the pedigrees

  ped2 <- rbind(0,0,0,0,ped1)

  ped2[1:4,3:4] <- 1

  ped2[1,5:6] <- c(1,1)

  ped2[2,5:6] <- c(1,2)

  ped2[3,5:6] <- c(2,1)

  ped2[4,5:6] <- c(2,2)

  

  aff.table <- xtabs(~ped2[,1]+ped2[,5]+ped2[,6],subset=ped2[,3]!="0")

  aff.table <- aff.table[-1,,] #discard the artificial family

  

  weight <- matrix(0,nrow=n.nuclear,ncol=6)

  dimnames(weight) <- 

    list(NULL,c("father","mother","ua_m","ua_f","a_m","a_f"))

  weight[,1] <- aff.table[,"2","2"]*(-2)

  weight[,2] <- (aff.table[,"1","2"]+aff.table[,"2","2"])*(-1)

  weight[,3] <- aff.table[,"1","2"]*(-1)

  weight[,4] <- aff.table[,"2","2"]*(-1)

  weight[,5] <- 2+aff.table[,"1","1"]

  weight[,6] <- 2+aff.table[,"2","1"]



  Dstat <- matrix(0,nrow=n.family,ncol=n.marker)

  

  for (i in 1:n.marker) {

    n.allele <- ((ped[,5+i*2]==allele[i]) + (ped[,6+i*2]==allele[i]))*ped[,5]/2

    n.allele <- n.allele * !(ped[,6]==0 & ped[,3] > 0)

    #offspring with unknown affection status

    type <- (ped[,3]>0)*ped[,6]*2+ped[,5]

    D.raw <- xtabs(n.allele*weight[cbind(rep(match(ped1[,1],

                   dimnames(aff.table)[[1]]),mcsize),type)]

                   ~ped[,dim(ped)[2]])

    Dstat[,i] <- apply(matrix(D.raw,ncol=mcsize),1,mean)

  }

                   

  Tstat <- apply(Dstat,2,DtoT)

  pvalue <- 2*(1-pnorm(abs(Tstat)))



  list(Dstat=Dstat,Tstat=Tstat, P=pvalue)

}





`makeped` <-

function(ped)



{

  #call C routine to carry out makeped



  ped <- as.matrix(ped)

  nind <- dim(ped)[1]

  pedout <- matrix(0,nrow=nind, ncol=dim(ped)[2]+4)



  pedout[,] <-

    .C("makepedc",as.integer(nind),as.integer(ped),

       out=as.integer(pedout),PACKAGE="makeped")$out

  

  pedout

}





`nufam` <-

function (ped,allele)

#Calculate D statisic for a single nuclear family

#No parental genotype reconstrutions 

#ped: data for one nuclear family with one marker locus, 

#     father at row 1, mother at row 2

#allele: allele code of interest

{

#parents and offspring postions

  fpos <- 1

  mpos <- 2

  offpos <- 3:dim(ped)[1]

  npos <- length(offpos)

  Dstat <- 0

  n_triad <- 0

  n_DSP <- 0 

  if (ped[mpos,7] !=0 && ped[mpos,8] !=0 &&

      ped[fpos,7] !=0 && ped[fpos,8] !=0) { 

    for (i in 1:npos) {

      if (ped[offpos[i],6]!=2) next # unaffected or unknown

      if (ped[offpos[i],7]==0) next # untyped

      D_triad <- triad(ped[c(fpos,mpos,offpos[i]),],allele)

      Dstat <- Dstat + D_triad

    }

  }

#pick up DSPs

  aff_o <- 2 + which(ped[offpos,6]==2 & ped[offpos,7] != 0)

  unaff_o <- 2 + which(ped[offpos,6]==1 & ped[offpos,7] !=0)

  n_aff_o <- length(aff_o)

  n_unaff_o <- length(unaff_o)

  if (n_aff_o > 0 && n_unaff_o > 0) {

    for (i in 1:n_aff_o) {

      for (j in 1:n_unaff_o) {

        D_DSP <- DSP(ped[c(aff_o[i],unaff_o[j]),],allele)

        Dstat <- Dstat + D_DSP

      }

    }

  }

  Dstat

}





`nufam.x` <-

function (ped,allele)

#Calculate D statisic for a single nuclear family

#No parental genotype reconstrutions 

#ped: data for one nuclear family with one marker locus on X chromosome

#     father at row 1, mother at row 2

#allele: allele code of interest

{

#parents and offspring postions

fpos <- 1

mpos <- 2

offpos <- 3:dim(ped)[1]

npos <- length(offpos)

Dstat <- 0

n_triad <- 0

n_DSP <- 0

#only use family triads if the mother is heterozygous for the marker locus 

#and has the allele of interest

if (ped[mpos,7] !=0 && ped[mpos,8] !=0 

   && ped[mpos,7] != ped[mpos,8]   

   && ( ped[mpos,7] == allele || ped[mpos,8] == allele)) {

   for (i in 1:npos) {

	if (ped[offpos[i],6]!=2) next # unaffected or unknown

	if (ped[offpos[i],7]==0) next # untyped

	D_triad <- triad.x(ped[c(fpos,mpos,offpos[i]),],allele)

	if (D_triad != 0) {

	   Dstat <- Dstat + D_triad

	   n_triad <- n_triad + 1

	   }

#	print(Dstat)

	}

   }

#pick up DSPs withing each gender

aff_m <- 2 + which(ped[offpos,5]==1 & ped[offpos,6]==2 & ped[offpos,7] != 0)

unaff_m <- 2 + which(ped[offpos,5]==1 & ped[offpos,6]==1 & ped[offpos,7] !=0)

aff_f <- 2 + which(ped[offpos,5]==2 & ped[offpos,6]==2 & ped[offpos,7]!=0)

unaff_f <- 2 + which(ped[offpos,5]==2 & ped[offpos,6]==1 & ped[offpos,7]!=0)

n_aff_m <- length(aff_m)

n_unaff_m <- length(unaff_m)

n_aff_f <- length(aff_f)

n_unaff_f <- length(unaff_f)

if (n_aff_m > 0 && n_unaff_m > 0) {

   for (i in 1:n_aff_m) {

       for (j in 1:n_unaff_m) {

	   D_DSP <- DSP.x(ped[c(aff_m[i],unaff_m[j]),],allele,male=T)

	   if (D_DSP != 0) {

	      Dstat <- Dstat + D_DSP

	      n_DSP <- n_DSP + 1

	      }

#	   print(c(Dstat,n_DSP))

	   }

       }

   }

if (n_aff_f > 0 && n_unaff_f > 0) {

   for (i in 1:n_aff_f) {

       for (j in 1:n_unaff_f) {

	   D_DSP <- DSP.x(ped[c(aff_f[i],unaff_f[j]),],allele,male=F)

	   if (D_DSP != 0) {

	      Dstat <- Dstat + D_DSP

	      n_DSP <- n_DSP + 1

	      }

#         print(c(Dstat,n_DSP))

	   }

       }

   }

list(D=Dstat,n_triad=n_triad,n_DSP=n_DSP)

}



`slink` <- function(ped,af,mcsize,sexlinked=FALSE,seed=NULL)
  
{
  ##call c routine to carry out MC simulation using slink
  ##single marker locus

  max.ped <- 500 #limit on the number of pedigrees in slink
  ped <- as.matrix(ped)
  nind <- dim(ped)[1] #number of individuals
  nped <- length(unique(ped[,1])) #number of pedigrees
  nlocus <- 1 #single marker
  if(is.null(seed)) seed <- round(runif(1,1,30000)) #random seed
  if(sexlinked) sexlinked <- 1
  else sexlinked <- 0

  pedout <- matrix(0,nrow=nind*mcsize,ncol=dim(ped)[2])
  ped.id <- unique(ped[,1])
  for (i in 1:(nped %/% max.ped+1)) {
    if (i*max.ped >= nped) 
      ped1.id <- ped.id[((i-1)*max.ped+1):nped]
    else
      ped1.id <- ped.id[((i-1)*max.ped+1):(i*max.ped)]
    ped1.idx <- ped[,1] %in% ped1.id
    ped1 <- ped[ped1.idx,]
    nped1 <- length(ped1.id)
    nind1 <- nrow(ped1)

    pedout.idx <- rep(ped1.idx,times=mcsize)
    pedout[pedout.idx,] <- 
    .C("slinkc",as.integer(nind1),as.integer(nped1),as.integer(nlocus),
       as.integer(ped1),as.integer(sexlinked),as.double(af),
       as.integer(seed),as.integer(mcsize),as.integer(0),
       as.integer(0),out=as.integer(pedout[pedout.idx,]),PACKAGE="slink")$out
  }
  return(pedout)
}


`triad` <-

function (ped,allele)

#calculate D statistic for one family triad and one marker locus

#assume everyone is typed

#ped: father at row 1, mother at row 2, affected offspring at row 3

#allele: allele of interest 

{

  Dstat <- 2*sum(ped[3,c(7,8)]==allele) - sum(ped[1:2,7:8]==allele)

  Dstat

}





`triad.x` <-

function (ped,allele)

#calculate D statistic for one family triad and one marker locus

#assume mother is heterozygote and has the allele of interest

#ped: father at row 1, mother at row 2, affected offspring at row 3

#allele: allele of interest 

{

#calculate transmission from the mother

#male offspring, only needs typed mother

if (ped[3,5]==1 && ped[3,7] == allele) Dstat <- 1

else if (ped[3,5]==1 && ped[3,7] != allele) Dstat <- -1 

#female offspring, needs both typed mother and typed father

else if (ped[1,7] != 0) {

     transmit <- sum(ped[3,c(7,8)]==allele) - sum(ped[1,7]==allele)

     Dstat <- transmit - (1 - transmit)

     }

else Dstat <- 0

Dstat

}





`PPAT` <-

function(ped) {



  #Pedigree Parental-asymmetry test (PPAT) for general pedigrees

  #Consider all the nuclear families with both parents



  family.id <- unique(ped[,1])

  n.fam <- length(family.id)

  pat.ab <- rep(0,n.fam)

  

  for (i in 1:n.fam) {

    

    fam <- family.id[i]

    family1 <- which(ped[,1]==fam)

    informative <- NULL



    for (ind in family1) {

      

      if (ped[ind,3] == 0 || ped[ind,6] == 1 || ped[ind,7] == 0) next

      fid <- which(ped[,1]==fam & ped[,2]==ped[ind,3])

      mid <- which(ped[,1]==fam & ped[,2]==ped[ind,4])

      if (ped[fid, 7] == 0) next

      if (ped[mid, 7] == 0) next

      if (ped[fid,7] + ped[fid,8] != ped[mid,7] + ped[mid,8])

      informative <- c(informative, ind)

    }

    if (length(informative) == 0) next

    for (j in 1:length(informative)) {

      ind <- informative[j]

      if (ped[ind,7] == ped[ind,8]) next

      fid <- which(ped[,1]==fam & ped[,2]==ped[ind,3])

      mid <- which(ped[,1]==fam & ped[,2]==ped[ind,4])     

      if (ped[fid,7] + ped[fid,8] - ped[mid,7] - ped[mid,8] < 0)

        pat.ab[i] <- pat.ab[i] + 1

      else if (ped[fid,7] + ped[fid,8] - ped[mid,7] - ped[mid,8] > 0)

        pat.ab[i] <- pat.ab[i] - 1

    }

  }

  pat <- DtoT(pat.ab)

  p.pat <- 2*(1-pnorm(abs(pat)))

  list(pvalue=p.pat,pat=pat)

}





`MCPPAT` <-

function(ped,mcsize,loci=NULL,af=NULL,sexlinked=FALSE) 



#MCPPAT with Monte Carlo using slink 

#Input has multiple pedigrees

  

#ped: the name of a standard linkage pedigree file or

#     a matrix/dataframe containing the pedigrees

#loci: the name of a standard linkage loci file

#af: dataframe/matrix of marker allele frequencies,

#          one column for each marker locus

#          If af=NA, it will be estimated from typed founders      

#mcsize: number of Monte Carlo simulations

#sexlinked: whether the markers are on X chromosome (doesn't actually
#           apply to MCPRAT since it only deals with autosomal markers



#ped is required. If loci is provided, af is read

#from the file specified. Otherwise, it can be provided directly in the

#function input arguments. If af is not provided in either way, it

#will be estimated from the typed founders' genotypes.

#In case of multiple markers, each marker will be analyzed seperately



{



  marker.name <- NULL

  

  #read in pedigree file if it is provided

  if (is.character(ped)) {

    pedfile <- ped

    ped <- read.table(pedfile,header=F,as.is=T)

    n.loci <- (dim(ped)[2] - 6) / 2

    n.ind <- dim(ped)[1]

  }

  else if(is.data.frame(ped)) {

    n.loci <- (dim(ped)[2] - 6) / 2

    n.ind <- dim(ped)[1]

    marker.name <- names(ped)[5+2*(1:n.loci)] #take marker names from ped

  }

  else if(is.matrix(ped)) {

    n.loci <- (dim(ped)[2] - 6) / 2

    n.ind <- dim(ped)[1]

    if(!is.null(dimnames(ped)))

      marker.name <- dimnames(ped)[[2]][5+2*(1:n.loci)]

  }



  ped <- as.matrix(ped)

  

  #process the pedigree file with makeped, may cause reordering of individuals

  #in each pedigree

  

  ped.make <- makeped(cbind(ped[,1:5],1:n.ind))

    #post makeped format

  ped.make <- cbind(ped.make[,1:9],ped[ped.make[,10],c(6,6+1:(2*n.loci))])

    #pre makeped format

  ped <- ped.make[,c(1:4,8,10,10+1:(2*n.loci))]



  #read in loci file if it is provided

  if (is.character(loci)) {

     locifile <- loci

     loci.raw <- readLines(locifile) #raw input

     loci.raw2 <- strsplit(loci.raw, " ")

     n.loci.2 <- as.integer(loci.raw2[[1]][1])

     if (n.loci != n.loci.2)

       stop("Numbers of loci in pedigree data and marker data don't match")

     sexlinked = (as.integer(loci.raw2[[1]][3]) == 1)

     af <- matrix(0,nrow=2,ncol=n.loci)

     for (i in 1:n.loci) 

       af[,i] <- as.numeric(loci.raw2[[3+i*2]][1:2])

     if ("#" %in% strsplit(loci.raw[4],"") && is.null(marker.name)) {

       marker.name <- character(n.loci)

       for(i in 1:n.loci)

         marker.name[i] <- strsplit(loci.raw[2+i*2],"#")[[1]][2]

     }

   }

  else if (is.null(af)[1]) {

    af <- matrix(0,ncol=n.loci,nrow=2)

    if(sexlinked)

      no.allele <- ped[ped[,3]==0,5] #number of alleles in founders

    else

      no.allele <- 2

    for (i in 1:n.loci) {

      founder.geno <- ped[ped[,3]==0, 6+(i-1)*2+1:2]

      af[1,i] <- sum((founder.geno==1)*no.allele)/

        sum((founder.geno>0)*no.allele)

      af[2,i] <- 1 - af[1,i]

    }

  }

        

  if (is.null(dimnames(af)) && is.null(marker.name)) {

    marker.name <- paste("Marker",1:n.loci)

    dimnames(af) <- list(1:2,marker.name)

  }

  else if(!is.null(dimnames(af)) && is.null(marker.name))

    marker.name <- dimnames(af)[[2]]

    

  pvalue <- matrix(0,ncol=4,nrow=n.loci)

  dimnames(pvalue) <- list(marker.name, c("PDT","MCPDT","PPAT","MCPPAT"))

  

  Tstat <- matrix(0,ncol=4,nrow=n.loci)

  dimnames(Tstat) <- list(marker.name, c("PDT","MCPDT","PPAT","MCPPAT")) 

  

  n.family <- length(unique(ped[,1]))

  n.ind <- dim(ped)[1]

  

  offpos <- which(ped[,3] != 0)

  no.off <- length(offpos)

  offused <- rep(0, no.off)

  

  nufampos <- numeric(0)

  nufampos.fm <- numeric(0) #positions of father and mother of each nuclear fam

  nufam.id <- numeric(0)

  orifam.id <- numeric(0)

  

  i <- 0

  

  repeat {

    off <- offpos[offused==0][1]

    if (is.na(off)) break

    i <- i+1

    fid <- ped[off,3]

    mid <- ped[off,4]

    fpos <- which(ped[,1] == ped[off,1] & ped[,2] == fid)

    mpos <- which(ped[,1] == ped[off,1] & ped[,2] == mid)

    alloff <- which(ped[,1] == ped[off,1] &

                    ped[,3] == fid & ped[,4] == mid)

    offused[match(alloff,offpos)] <- 1

    allpos <- c(fpos,mpos,alloff)

    nufampos.fm <- c(nufampos.fm,length(nufampos)+1:2)

    nufampos <- c(nufampos,allpos)

    nufam.id <- c(nufam.id, rep(i,length(allpos)))

    orifam.id <- c(orifam.id,rep(ped[off,1],length(allpos)))

  }



  n.ind.nuclear <- length(nufampos)

  



#PDT and PPAT



  for (i in 1:n.loci) {



  #PDT



    if(sexlinked) {

      result <- XPDT(ped,i,1)
      pvalue[i,1] <- result$P

      Tstat[i,1] <- result$Tstat
      pvalue[i,3] <- 0

      Tstat[i,3] <- 0
    }

    else {

      result <- PDT(ped,i,1)

      pvalue[i,1] <- result$P

      Tstat[i,1] <- result$Tstat



  #PPAT



      peds <- ped[,c(1:6,(5:6)+i*2)]

      result <- PPAT(peds)

      pvalue[i,3] <- result$pvalue

      Tstat[i,3] <- result$pat
    }  

  }





#MCPDT and MCPPAT
  if (sum(ped[,7:(6+2*n.loci)]==0)<0.1) { #no missing genotypes
      pvalue[,2] <- pvalue[,1]
      Tstat[,2] <- Tstat[,1]
      pvalue[,4] <- pvalue[,3]
      Tstat[,4] <- Tstat[,3]
  }
  else {

  

    for (i in 1:n.loci) {

    

      ped.mc <- slink(cbind(ped.make[,c(1:9,10+(i-1)*2+1:2)],1),

                    af[,i],mcsize,sexlinked)

      ped.mc <- cbind(ped.mc[,c(1:4,8)],ped[,6],ped.mc[,9+1:2])

    

      nuclear.ped.mc <-

        ped.mc[rep(n.ind*0:(mcsize-1),each=n.ind.nuclear) + nufampos,]

      nuclear.ped.mc[,1] <-

        rep(0:(mcsize-1)*max(nufam.id),each=n.ind.nuclear) + nufam.id

      nuclear.ped.mc[rep(n.ind.nuclear*0:(mcsize-1),each=length(nufampos.fm))

                   + nufampos.fm, 3:4] <- 0

      nuclear.ped.mc <- cbind(nuclear.ped.mc,

                            rep(0:(mcsize-1)*max(orifam.id),

                                each=n.ind.nuclear) + orifam.id)

    

      if (sexlinked) {

        result <- XPDT.mc(nuclear.ped.mc,mcsize=mcsize)
        pvalue[i,2] <- result$P.MCPDT

        Tstat[i,2] <- result$Tstat.MCPDT

        pvalue[i,4] <- 0

        Tstat[i,4] <- 0
      }

      else {

        result <- PPAT.mc(nuclear.ped.mc,mcsize=mcsize)

        pvalue[i,2] <- result$P.MCPDT

        Tstat[i,2] <- result$Tstat.MCPDT

        pvalue[i,4] <- result$P.MCPPAT

        Tstat[i,4] <- result$Tstat.MCPPAT
      }

    }
  }



  list(pvalue=pvalue, Tstat=Tstat, mcsize=mcsize, af=af)

}





`PPAT.mc` <-

function(ped,allele=NULL,mcsize=1)



  #calculate PAT statistics and pvalues for complete pedigree data with no missing genotypes, which may have multiple Monte Carlo samples with the same affection status structure.

  #ped: pedigree data consisting with nuclear families with no missing data, the last column is the original family id, the first column is the nuclear family id, remaining columns are in standard linkage format.

  #allele: alleles at each locus to calculate the PAT statistcs. The default is allele 1 for each locus.

  #mcsize: Monte Carlo sample size.



{

  

#MCPPAT



  onesamplesize <- (dim(ped)[1])/mcsize

  ped1 <- ped[1:onesamplesize,] #first sample

  n.family <- length(unique(ped1[,dim(ped)[2]]))

  n.marker <- (dim(ped)[[2]]-7)/2

  informative <- NULL

  ncol.ped <- dim(ped)[2]

  ped.id.whole <- unique(ped[,ncol.ped])



  D.raw <- matrix(0,nrow=n.family*mcsize,ncol=n.marker)

  Dstat <- matrix(0,nrow=n.family,ncol=n.marker)



  father.index <- match(unique(ped[,1]),ped[,1]) #indexes of fathers

  no.ind.nufam <- as.vector(table(ped[,1]))

  #number of inds in each nuclear family

  

  for (i in 1:n.marker) {



    PA.nufam <- - sign(ped[father.index,5+i*2] + ped[father.index,6+i*2] -

                       ped[father.index+1,5+i*2] - ped[father.index+1,6+i*2])

    #Parental asymmetry for each unclear family



    PA.all <- rep(PA.nufam,no.ind.nufam)

    #Parental asymmetry spreaded to the same length as the number

    #of individuals in all nuclear families



    informative.all <- ped[,3]!=0 & ped[,6]==2 & ped[,5+i*2]+ped[,6+i*2]==3

    #informativeness of all individuals with those of

    #the affected offspring with heterozygous genotype being 1(TRUE)



    PAT.all <- PA.all * informative.all

    #PAT stats for all individuals (with those of parents being 0)



    D.raw[,i] <- xtabs(PAT.all~ped[,ncol.ped])

    Dstat[,i] <- apply(matrix(D.raw,ncol=mcsize),1,mean)

  }                   

  Tstat <- apply(Dstat,2,DtoT)

  pvalue <- 2*(1-pnorm(abs(Tstat)))

  

#MCPDT



  n.nuclear <- length(unique(ped1[,1]))

  if (is.null(allele)) allele <- rep(1,n.marker)



  aff.table <- xtabs(~ped1[,1]+ped1[,6],subset=ped1[,3]!="0")

      #use "0" instead of 0 in case the individual ids are characters

  weight <- matrix(0,nrow=n.nuclear,ncol=3)

  dimnames(weight) <- 

    list(NULL,c("parent","uaff","aff"))

  weight[,1] <- aff.table[,"2"]*(-1)

  weight[,2] <- aff.table[,"2"]*(-1)

  if (any(ped1[,6]==1 & ped1[,3] != "0"))

    weight[,3] <- 2+aff.table[,"1"]

  else

    weight[,3] <- 2



  Dstat <- matrix(0,nrow=n.family,ncol=n.marker)

  

  for (i in 1:n.marker) {

    n.allele <- (ped[,5+i*2]==allele[i]) + (ped[,6+i*2]==allele[i])

    n.allele <- n.allele * !(ped[,6]==0 & ped[,3] != "0")

    #offspring with unknown affection status

    type <- (ped[,3]!="0")*ped[,6]+1

    #parent=0*?+1=1, unaff off=1*1+1=2, aff off=1*2+1=3

    D.raw <- xtabs(n.allele*weight[cbind(rep(match(ped1[,1],

                   dimnames(aff.table)[[1]]),mcsize),type)]

                   ~ped[,dim(ped)[2]])

    Dstat[,i] <- apply(matrix(D.raw,ncol=mcsize),1,mean)

  }



  Tstat.MCPDT <- apply(Dstat,2,DtoT)

  pvalue.MCPDT <- 2*(1-pnorm(abs(Tstat.MCPDT)))



  list(Tstat.MCPPAT=Tstat, P.MCPPAT=pvalue, P.MCPDT=pvalue.MCPDT, Tstat.MCPDT=Tstat.MCPDT)

}

