Quantcast
Channel: グラフ - 廿TT
Viewing all articles
Browse latest Browse all 123

ggplot2 などのモダンなパッケージを使わずにトピックモデルのパラメータを可視化したい

$
0
0

腸内細菌のデータを使います。

説明は気が向いたら書きます。

f:id:abrahamcow:20200229083037p:plain

f:id:abrahamcow:20200229083117p:plain

library(curatedMetagenomicData)
plot.minibarTable <-function(tab,layoutMat,leftmargin=15, col="black"){
  oldpar <- graphics::par(no.readonly =TRUE)
  N <- length(tab)
  ran <-c(0,max(sapply(tab,function(x)max(x$value))))
  graphics::par(mar = c(1.5, leftmargin,1.5,0), oma = rep(1,4))
  graphics::layout(mat = layoutMat, respect =FALSE)for(i in1:N){
    tmpy <- tab[[i]]$value
    graphics::plot.default(rev(tmpy),1:length(tmpy),
                           xlim = ran,
                           type ="p",pch=16,
                           xaxt="n", yaxt="n",
                           xlab ="", ylab ="", 
                           frame.plot =FALSE,main = paste("topic",i),
                           col=col)
    graphics::segments(numeric(length(tmpy)),1:length(tmpy),rev(tmpy),1:length(tmpy), pch =16, cex =0.9,col=col)
    graphics::axis(side=2,at=1:length(tmpy),labels = tab[[i]]$name,las=2,lwd=0)
    abline(v=0)if(i %% nrow(layoutMat)==0| i==N){
      graphics::axis(side=1,at=format(ran,digits =2), lwd=0, lwd.ticks =1)}}
  graphics::par(oldpar)}
annotationBarplot <-function(W,annotation,hilight.pos=FALSE, hilight.col="gray10",cols=NULL){
  u <- unique(annotation)
  N <- nrow(W)
  bp <- barplot(W,space =0,plot =FALSE)if(is.null(cols)){
    cols <- grey.colors(N)if(hilight.pos[1]){
      cols <- rep("gray90",N)}}
  cols[hilight.pos]<- hilight.col
  bp <- barplot(W,plot=FALSE)
  bp <- barplot(W,xlim = c(0,max(bp)+12),legend.text =1:nrow(W), col = cols,
                args.legend =list(x ="topright", bty ="n"),border = cols,axes =FALSE)
  axis(2,at=c(0,1),las=2)for(i in1:length(u)){
    axis(side =1,at=range(bp[u[i]==annotation]),labels = c("",""),
         las=2,lwd=2)
    axis(side =1,at=mean(range(bp[u[i]==annotation])),labels = as.character(u[i]),
         las=1,lwd=0)}}
VBDirMult <-function(Y,L=2,alpha=rep(1,ncol(Y)),beta=rep(1,L),maxit=1000,seed=1){
  set.seed(1)
  N <- nrow(Y)
  K <- ncol(Y)
  EelW <-matrix(rgamma(N,beta),N,L,byrow =TRUE)
  EelW <- EelW/rowSums(EelW)
  EelH <-matrix(rgamma(L,alpha),L,K,byrow =TRUE)
  EelH <- EelH/rowSums(EelH)for(iter in1:maxit){
    Sw <- EelW *(((Y)/(EelW %*% EelH))%*% t(EelH))
    Sh <- EelH *(t(EelW)%*%(Y/(EelW %*% EelH)))
    beta_W <- beta + Sw
    alpha_H <- alpha + Sh
    EelW <-exp(sweep(digamma(beta_W),1,digamma(rowSums(beta_W))))
    EelH <-exp(sweep(digamma(alpha_H),1,digamma(rowSums(alpha_H))))}
  EW <- sweep(beta_W,1,rowSums(beta_W),"/")
  EH <- sweep(alpha_H,1,rowSums(alpha_H),"/")
  ELBO <- sum(lgamma(rowSums(Y)+1))- sum(lgamma(Y+1))+
    sum(-Y*(((EelW*log(EelW))%*%EelH + EelW%*%(EelH*log(EelH)))/(EelW%*%EelH)-log(EelW%*%EelH)))+
    L*sum(lgamma(sum(alpha))-sum(lgamma(alpha)))+
    sum(-lgamma(rowSums(alpha_H))+rowSums(lgamma(alpha_H)))+
    N*sum(lgamma(sum(beta))-sum(lgamma(beta)))+
    sum(-lgamma(rowSums(beta_W))+rowSums(lgamma(beta_W)))return(list(W=EW,H=EH,ELBO=ELBO))}

makeTopTable <-function(H,n=5){
  toplist <- apply(H,1,function(x){
    top = sort(x,decreasing =TRUE)[1:n]data.frame(name=factor(names(top),levels = names(top)),
               value=unname(top))})  
  class(toplist)<-"minibarTable"return(toplist)}

Zeller <- curatedMetagenomicData("ZellerG_2014.metaphlan_bugs_list.stool",
                       counts=TRUE, dryrun=FALSE)
Zeller.counts = exprs(Zeller$ZellerG_2014.metaphlan_bugs_list.stool)
splitnames <- strsplit(rownames(Zeller.counts),"\\|")
len <- sapply(splitnames, length)
Zeller.counts.g <- Zeller.counts[len==6,]
rownames(Zeller.counts.g)<- sapply(splitnames[len==6],function(x)x[6])

disease <- factor(Zeller$ZellerG_2014.metaphlan_bugs_list.stool$disease,levels=c("CRC","adenoma","healthy"))
ord <- order(disease)
disease <- disease[ord]
out <- VBDirMult(t(Zeller.counts.g)[ord,],L=5)

top5list <- makeTopTable(out$H)
mat <-matrix(1:5,ncol=1)
plot(top5list,mat,leftmargin =16,col="firebrick")

W <- t(unname(out$W))
ag <- apply(W,1,function(x)tapply(x, disease, median))
colnames(ag)<-1:5
mosaicplot(ag,cex.axis =0.9,main="",color ="cornflowerblue")

annotationBarplot(W,annotation = disease,cols = RColorBrewer::brewer.pal(5,"Greens"))

Viewing all articles
Browse latest Browse all 123

Trending Articles