NAGuide R methods

NAGuide R methods#

Setup basic methods and packages used for all methods

  • BiocManager could be moved to methods who are installed from BioConductor

Hide code cell source

# options("install.lock"=FALSE)

packages_base_R <-
  c("BiocManager", "reshape2", "data.table", "readr", "tibble")

install_rpackage  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
  
}

# used in the large imputation function for two packages
install_bioconductor  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    BiocManager::install(pkg)
    library(pkg, character.only = TRUE)
  }
  
}


for (package in packages_base_R) {
  # Check if the package is already installed
  install_rpackage(pkg = package)
}
Loading required package: BiocManager
Loading required package: reshape2
Loading required package: data.table

Attaching package: ‘data.table’
The following objects are masked from ‘package:reshape2’:

    dcast, melt
Loading required package: readr
Loading required package: tibble

setup can be tricky… trying to integrate as much as possible into conda environment

Copied from NAGuideR’s github RShiny application. Adapted to run as standalone function in context of the Snakemake workflow.

  • df and df1 ?

  • seems quite hacky

  • code is only slightly adapted from repo to run here, mainly to install packages on the fly

Hide code cell source

nafunctions <- function(x, method = "zero") {
  df <- df1 <- as.data.frame(x)
  method <- tolower(method)
  if (method == "zero") {
    df[is.na(df)] <- 0
  }
  else if (method == "minimum") {
    df[is.na(df)] <- min(df1, na.rm = TRUE)
  }
  else if (method == "colmedian") {
    install_rpackage('e1071')
    df <- impute(df1, what = "median")
  }
  else if (method == "rowmedian") {
    install_rpackage('e1071')
    dfx <- impute(t(df1), what = "median")
    df <- t(dfx)
  }
  else if (method == "knn_impute") {
    install_bioconductor('impute')
    data_zero1 <-
      impute.knn(as.matrix(df1),
                 k = 10,
                 rowmax = 1,
                 colmax = 1)#rowmax = 0.9, colmax = 0.9
    df <- data_zero1$data
  }
  else if (method == "seqknn") {
    if (!require(SeqKnn)) {
      install.packages("src/R_NAGuideR/SeqKnn_1.0.1.tar.gz",
                       repos = NULL,
                       type = "source")
      library(SeqKnn)
    }
    df <- SeqKNN(df1, k = 10)
  }
  else if (method == "bpca") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(
        as.matrix(df1),
        nPcs = ncol(df1) - 1,
        method = "bpca",
        maxSteps = 100
      )
    df <- completeObs(data_zero1)
  }
  else if (method == "svdmethod") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(as.matrix(df1),
                      nPcs = ncol(df1) - 1,
                      method = "svdImpute")
    df <- completeObs(data_zero1)
  }
  else if (method == "lls") {
    install_bioconductor('pcaMethods')
    data_zero1 <- llsImpute(t(df1), k = 10)
    df <- t(completeObs(data_zero1))
  }
  else if (method == "mle") {
    install_rpackage('norm')
    xxm <- as.matrix(df1)
    ss <- norm::prelim.norm(xxm)
    thx <- norm::em.norm(ss)
    norm::rngseed(123)
    df <- norm::imp.norm(ss, thx, xxm)
  }
  else if (method == "qrilc") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- t(df1)
    data_zero1 <-
      imputeLCMD::impute.QRILC(xxm, tune.sigma = 1)[[1]]
    df <- t(data_zero1)
  }
  else if (method == "mindet") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <- imputeLCMD::impute.MinDet(xxm, q = 0.01)
  }
  else if (method == "minprob") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <-
      imputeLCMD::impute.MinProb(xxm, q = 0.01, tune.sigma = 1)
  }
  else if (method == "irm") {
    install_rpackage('VIM')
    df <- irmi(df1, trace = TRUE, imp_var = FALSE)
    rownames(df) <- rownames(df1)
  }
  else if (method == "impseq") {
    install_rpackage('rrcovNA')
    df <- impSeq(df1)
  }
  else if (method == "impseqrob") {
    install_rpackage('rrcovNA')
    data_zero1 <- impSeqRob(df1, alpha = 0.9)
    df <- data_zero1$x
  }
  else if (method == "mice-norm") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "norm")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "mice-cart") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "cart")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "trknn") {
    source('src/R_NAGuideR/Imput_funcs.r')
    # sim_trKNN_wrapper <- function(data) {
    #   result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t
    #   return(result)
    # }
    # df1x <- sim_trKNN_wrapper(t(df1))
    # df<-as.data.frame(t(df1x))
    df <-
      imputeKNN(as.matrix(df),
                k = 10,
                distance = 'truncation',
                perc = 0)
    df <- as.data.frame(df)
  }
  else if (method == "rf") {
    install_rpackage("missForest")
    data_zero1 <- missForest(
      t(df1),
      maxiter = 10,
      ntree = 20 # input$rfntrees
      ,
      mtry = floor(nrow(df1) ^ (1 / 3)),
      verbose = TRUE
    )
    df <- t(data_zero1$ximp)
  }
  else if (method == "pi") {
    width <- 0.3 # input$piwidth
    downshift <- 1.8 # input$pidownshift
    for (i in 1:ncol(df1)) {
      temp <- df1[[i]]
      if (sum(is.na(temp)) > 0) {
        temp.sd <- width * sd(temp[!is.na(temp)], na.rm = TRUE)
        temp.mean <-
          mean(temp[!is.na(temp)], na.rm = TRUE) - downshift * sd(temp[!is.na(temp)], na.rm = TRUE)
        n.missing <- sum(is.na(temp))
        temp[is.na(temp)] <-
          rnorm(n.missing, mean = temp.mean, sd = temp.sd)
        df[[i]] <- temp
      }
    }
    df
  }
  # else if(method=="grr"){
  #   library(DreamAI)
  #   df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
  # }
  else if (method == "gms") {
    # install.packages('GMSimpute')
    if (!require(GMSimpute)) {
      install.packages(
        "src/R_NAGuideR/GMSimpute_0.0.1.1.tar.gz",
        repos = NULL,
        type = "source"
      )
      
      library(GMSimpute)
    }
    
    df <- GMS.Lasso(df1,
                    nfolds = 3,
                    log.scale = FALSE,
                    TS.Lasso = TRUE)
  }
  else if (method == "msimpute") {
    install_bioconductor("msImpute")
    df <- msImpute(as.matrix(df),
                   method = 'v2')
    df <- as.data.frame(df)
  }
  else if (method == "msimpute_mnar") {
    install_bioconductor("msImpute")
    df <-
      msImpute(as.matrix(df),
               method = 'v2-mnar',
               group = rep(1, dim(df)[2]))
    df <- as.data.frame(df)
  }
  else if (method == "gsimp") {
    options(stringsAsFactors = F)
    # dependencies parly for sourced file
    
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    install_rpackage("magrittr")
    install_rpackage("glmnet")
    install_rpackage("abind")
    install_rpackage("foreach")
    install_rpackage("doParallel")
    source('src/R_NAGuideR/GSimp.R')
    
    # wrapper function with data pre-processing
    pre_processing_GS_wrapper <- function(data_raw_log) {
      # samples in rows, features in columns #
      # Initialization #
      data_raw_log_qrilc <- as.data.frame(data_raw_log) %>%
        impute.QRILC() %>% extract2(1)
      # Centralization and scaling #
      data_raw_log_qrilc_sc <-
        scale_recover(data_raw_log_qrilc, method = 'scale')
      # Data after centralization and scaling #
      data_raw_log_qrilc_sc_df <- data_raw_log_qrilc_sc[[1]]
      # Parameters for centralization and scaling (for scaling recovery) #
      data_raw_log_qrilc_sc_df_param <- data_raw_log_qrilc_sc[[2]]
      # NA position #
      NA_pos <- which(is.na(data_raw_log), arr.ind = T)
      # NA introduced to log-scaled-initialized data #
      data_raw_log_sc <- data_raw_log_qrilc_sc_df
      data_raw_log_sc[NA_pos] <- NA
      # Feed initialized and missing data into GSimp imputation #
      result <-
        data_raw_log_sc %>% GS_impute(
          .,
          iters_each = 50,
          iters_all = 10,
          initial = data_raw_log_qrilc_sc_df,
          lo = -Inf,
          hi = 'min',
          n_cores = 1,
          imp_model = 'glmnet_pred'
        )
      data_imp_log_sc <- result$data_imp
      # Data recovery #
      data_imp <- data_imp_log_sc %>%
        scale_recover(., method = 'recover',
                      param_df = data_raw_log_qrilc_sc_df_param) %>%
        extract2(1)
      return(data_imp)
    }
    df <- t(df) # samples in rows, feature in columns
    df <- pre_processing_GS_wrapper(df)
    df <- t(df) # features in rows, samples in columns
    
  }
  else{
    stop(paste("Unspported methods so far: ", method))
  }
  df <- as.data.frame(df)
  df
}

Parameters#

Choose one of the available methods. Some methods might fail for your dataset for unknown reasons (and the error won’t always be easy to understand)

method = 'ZERO'
method = 'MINIMUM'
method = 'COLMEDIAN'
method = 'ROWMEDIAN'
method = 'KNN_IMPUTE'
method = 'SEQKNN'
method = 'BPCA'
method = 'SVDMETHOD'
method = 'LLS'
method = 'MLE'
mehtod = 'LLS'
method = 'QRILC'
method = 'MINDET'
method = 'MINPROB'
method = 'IRM'
method = 'IMPSEQ'
method = 'IMPSEQROB'
method = 'MICE-NORM'
method = 'MICE-CART'
method = 'RF'
method = 'PI'
method = 'GMS'
method = 'TRKNN',
method = 'MSIMPUTE'
method = 'MSIMPUTE_MNAR'
method = 'GSIMP'
train_split = 'runs/example/data/data_wide_sample_cols.csv' # test
folder_experiment = 'runs/example/'
method = 'KNN_IMPUTE'
# Parameters
train_split = "runs/alzheimer_study/data/data_wide_sample_cols.csv"
method = "MLE"
folder_experiment = "runs/alzheimer_study"

Dump predictions#

df <-
  utils::read.csv(
    train_split,
    row.names = 1,
    header = TRUE,
    stringsAsFactors = FALSE
  )
df
A data.frame: 1421 × 210
Sample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_009Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P3523715.91237 NA16.1114916.1069715.6032115.8116115.4996615.2207515.98013 NA NA15.8626415.6559415.40126 NA15.6822515.7980415.7394915.47682 NA
A0A024R0T9;K7ER74;P0265516.8519416.87369 NA17.0315115.3305118.6140717.4091717.6839116.3862516.5897217.3095516.6148017.9533918.1993817.2791116.8863517.55400 NA16.7794817.26137
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q815.5704715.5189215.9353215.8018715.3752215.6242015.9118515.3849915.8944715.37538 NA15.9317914.8587115.12451 NA14.9100615.5996615.4689614.9949615.17487
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O7550316.4810816.3869416.4157716.9786216.6790615.9576416.2338916.4179316.2713416.2097015.9540216.0295916.4975316.5128416.5126916.4823215.9378216.8977316.1324616.23505
A0A075B6H717.30138 NA18.1748015.96322 NA18.31720 NA17.2139317.79403 NA NA16.4826214.8346713.5689314.48257 NA NA NA NA NA
A0A075B6H920.2464619.9411019.2509019.6279720.4498318.6856920.3472220.7960418.9952319.24667 NA NA18.2698619.8006220.1831517.7048518.1545018.6363114.9075417.89344
A0A075B6I016.7644618.7862416.8321717.8520918.6818517.8654318.2499520.3093617.9784319.4104718.0055018.5414217.8074819.8986519.6742017.0394518.1517917.95040 NA17.74443
A0A075B6I117.5835717.1438815.6707318.8774917.0813114.8236918.5196919.3564018.5644917.9631016.4140217.5051014.0191117.5057420.25111 NA16.5026716.32073 NA16.37054
A0A075B6I616.98778 NA17.0118314.1815314.1402117.0417917.3756016.9979417.2031116.7583915.7254317.0675816.4584916.3277816.3337216.4130416.8600316.4007116.1186815.77958
A0A075B6I920.0541619.0673318.5690618.9850619.6856419.3758019.3988520.5435918.8439718.8997521.8526320.1465019.5345919.8671320.7662619.1021118.5379618.8489718.3677618.80581
A0A075B6J9 NA16.18799 NA13.4381314.4948415.9087516.18436 NA16.1701916.32362 NA NA16.6319317.4357419.7781116.06423 NA17.58044 NA16.53239
A0A075B6K416.1485016.1272715.3867216.5650216.4177116.45498 NA17.0467716.0321016.1490818.5816218.5173716.4835416.0633817.1912315.3503316.58173 NA NA16.33787
A0A075B6K517.3426117.4172217.2363316.2671717.3902018.3360417.2176518.3248817.5245818.7760819.5156618.9890617.8061416.7481918.4125417.1538417.9018417.5189917.5022016.98874
A0A075B6P5;P01615 NA NA NA16.9896517.4933018.2692317.7898918.1533417.7870318.3521319.92434 NA NA19.8413619.2662017.1752819.5750619.3058818.9765118.68810
A0A075B6Q5 NA NA15.12836 NA NA NA NA16.73949 NA NA NA17.0259816.1028715.87972 NA NA NA16.88355 NA15.68019
A0A075B6R2 NA NA16.2798116.7774417.4974417.0539616.9177019.7722216.7370517.9567017.9982218.1213119.19184 NA17.2858916.5977717.0704416.06459 NA16.03648
A0A075B6R9;A0A0C4DH6819.5164019.37284 NA19.0092319.5414719.5671920.1850720.5645419.5232419.3871620.1274920.3736019.4998318.9773920.5883218.0733718.8467319.6205419.3392620.39256
A0A075B6S215.5112416.2082414.8457015.23242 NA16.2865314.65632 NA14.5121216.5692515.0742315.9130416.6596315.8011717.4537814.1015316.6121316.2198415.6958216.38320
A0A075B6S5 NA NA NA NA NA15.9601616.1527717.0784216.8465916.3338515.5131516.14929 NA15.3513315.9116115.6517016.03424 NA NA NA
A0A075B6S916.5576517.5159717.9399916.9514717.0879517.3609916.4210319.7908017.1339915.4731717.2595820.5021819.2650718.4246818.1972516.1456319.5037318.4250117.7519016.21995
A0A075B7B814.91594 NA14.65865 NA15.3488614.5011115.1219614.5114215.31218 NA NA NA NA NA NA NA NA NA NA NA
A0A075B7D018.35754 NA18.2301819.2610319.7863719.6555118.5159320.5000819.5057220.00935 NA19.23066 NA18.0771119.4352819.0449817.0216318.0826218.7376717.31493
A0A087WSV8;V9HW7515.6758714.50492 NA15.5530115.52087 NA15.21659 NA13.85270 NA NA15.5257615.2093415.5381413.9836015.1491715.89044 NA15.4187214.79854
A0A087WSY4 NA17.1499315.65052 NA NA NA NA NA NA NA15.85031 NA16.2078115.33813 NA15.12749 NA NA NA20.34338
A0A087WSY5;Q96IY417.8632318.0413017.7766818.3417818.4288817.8513117.6895319.1298418.0186818.2319617.8611717.1273117.2878418.3573418.0921317.1149617.1954117.9442618.0847817.34736
A0A087WSY620.1168620.2745019.2808019.7819019.7265820.7958019.6323920.5975919.9661018.7243921.2815519.5135919.2447419.7366319.37543 NA19.7985518.6509218.4342219.15982
A0A087WSZ0;A0A0G2JQJ016.68463 NA NA NA NA13.03775 NA15.2586915.5957813.63975 NA15.9783514.31120 NA17.3274714.8897215.6365813.7425916.4765213.66795
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-217.5937718.0124017.8292118.2685017.5570617.3805117.3108216.4085917.3750618.0290816.9424917.74655 NA17.69916 NA18.0800017.4805717.8153518.1862417.91903
A0A087WTA8;P0812317.6190818.2168517.5444517.5022517.8028816.9918417.9773917.7099417.5321317.3504517.5621916.5727916.90066 NA NA16.9340216.74856 NA17.1775717.26857
A0A087WTK0;A0A087WVC6;Q1291315.5012115.3069615.8147115.5515615.3213116.0441516.18863 NA15.5648615.37830 NA15.7602915.21911 NA NA NA15.3939615.24665 NA NA
Q9UN70;Q9UN70-216.1573415.7245516.4318216.2305215.95639 NA16.4384315.6314615.7212615.74455 NA15.6839115.4991315.9858515.0297915.4857115.5279816.0801215.6316515.77433
Q9UNN817.5605617.3015917.76464 NA17.1159118.1904217.9768217.0015917.09973 NA16.77195 NA17.7796717.5903017.3782116.4571316.4507517.8933217.6361817.39555
Q9UNW116.9599716.5129016.14734 NA16.5081815.6471416.4181416.4868116.3143817.10669 NA16.21768 NA16.06726 NA16.3173416.2100416.5703716.1786816.53072
Q9UP7915.15310 NA16.0362115.4160915.5027716.3383315.8980214.59940 NA15.94205 NA NA15.08015 NA15.2610415.73977 NA NA16.10664 NA
Q9UPU318.3254018.23025 NA18.1004018.1773818.4628817.9007617.7512118.7432117.7462416.7844818.4704719.0300518.6485618.1108918.4219618.3693717.6405917.2421018.27864
Q9UQ5215.3490115.5498915.71187 NA15.6280915.5390215.4003114.5504515.0872415.2123315.8143715.6499415.60246 NA15.0466615.7163716.1233015.6857115.8348516.18904
Q9UQM716.7798316.6355716.9280017.3646817.2613916.5729216.34126 NA17.18529 NA15.4493817.1391517.4659815.9544616.0165117.9082516.2232617.0897316.3884517.38407
Q9Y24016.2570016.2715416.5778017.13331 NA16.7610916.9081016.8798015.2639316.6680716.7483116.5384216.9056716.7234616.6139616.7144216.2557216.5080217.28611 NA
Q9Y279;Q9Y279-217.1679417.4455617.3634517.2928917.2204716.9875417.17998 NA17.1630416.5564115.0289316.5458916.5845016.5875517.6596316.6165616.70633 NA NA16.95138
Q9Y281;Q9Y281-3 NA NA NA NA NA NA NA NA NA NA NA12.30069 NA NA NA13.0103313.07666 NA NA12.54090
Q9Y28717.6628917.58819 NA17.6961417.7354117.0309416.9508318.26680 NA16.89611 NA NA16.5786617.9707417.1072818.2760816.4771616.5435416.1737817.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-616.9277617.2207417.4259517.2986117.8281616.7418116.7206915.9049916.3961117.3693516.2202616.7063517.8930417.7065917.09718 NA16.9573018.3758717.5733317.93369
Q9Y2T3;Q9Y2T3-318.4676717.7211318.4401218.8656518.7322618.63877 NA17.1155719.3320318.0933816.5274618.1635417.6509217.5202617.4169318.6247718.2610718.5141918.0065017.90217
Q9Y490 NA13.96193 NA NA13.6269013.3812113.74195 NA NA13.37095 NA NA NA NA NA12.9323313.29442 NA13.8408613.52534
Q9Y4L118.5979518.47622 NA18.5596418.3046518.5237918.3602117.8747118.6725818.4313117.1867318.4140618.2671018.0855717.5979218.2899117.9772018.1487517.8805518.12515
Q9Y5F6;Q9Y5F6-216.4692915.78240 NA16.5289916.2848016.8634415.8995316.1095217.4013216.3436715.4592616.4288416.1595015.6315614.7356915.9682116.8848115.8777215.5542416.57524
Q9Y5I4;Q9Y5I4-217.1870117.4471717.4104317.5451417.2966717.3972817.1453216.4048217.3230817.4575315.8973117.13489 NA17.0858815.8738617.1035417.1090216.9384017.1550816.77629
Q9Y5Y718.8395719.1949519.0882018.7149718.6682419.0513319.0871118.8024518.7143918.7874517.1345217.8372718.7856218.6136718.7374718.7263918.4601119.50151 NA NA
Q9Y61716.8589716.7992716.2878217.07490 NA16.4995216.7026415.9239816.6377516.4427714.5826314.9398815.8697015.7117715.4650715.8083115.0352916.2830615.9196015.71292
Q9Y64619.3219619.1902919.7019419.7601319.6236919.2862819.6221919.0164419.1501219.5165219.8615620.2448419.2073120.3028119.4350719.8938820.0145720.3061420.2034220.04164
Q9Y653;Q9Y653-2;Q9Y653-316.0124315.5279415.2287115.4948414.7566815.0856315.78890 NA15.63408 NA NA14.9520115.5336115.0601415.66794 NA15.4222815.8079615.1574015.23671
Q9Y69615.1777815.5755314.7277014.59034 NA14.8599516.1363214.9492815.2656814.63326 NA15.8447915.8078416.1657415.9144815.6838716.1063316.0978016.7117615.65174
Q9Y6C2 NA NA13.7565814.68157 NA NA NA14.31805 NA NA NA NA NA NA14.2040714.23561 NA14.40272 NA15.21141
Q9Y6N615.0502114.8329015.1182815.1401815.2558814.69813 NA NA15.29271 NA14.7428215.1248215.2456815.2850215.0246115.4150415.3453615.7153214.6398314.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-416.8422116.5973917.4395217.3560817.0749517.0880316.71735 NA17.4531916.91617 NA17.0342516.7477916.39020 NA17.5509217.08447 NA16.5327616.74913
Q9Y6R7 NA20.2993319.5978519.4288019.5819819.1303318.6903018.9960519.9931818.5503419.2330418.8163618.9021218.81282 NA17.9218518.7081418.7247819.4111319.27470
Q9Y6X5 NA15.5555815.73522 NA15.32828 NA NA NA NA NA15.8946915.5618916.0887715.6691115.0119316.34016 NA16.1382715.8069015.73176
Q9Y6Y8;Q9Y6Y8-219.5628219.3857520.4473220.21596 NA19.6331920.0568618.6804420.0232319.94839 NA19.8094820.2043619.4182317.8474319.9276619.4326019.5991219.5450019.57720
Q9Y6Y9 NA13.9699712.63641 NA13.14453 NA12.8174112.89658 NA13.68470 NA NA12.7073812.9781212.2875012.92941 NA13.63705 NA11.04203
S4R3U612.8052112.4424512.5046612.44461 NA NA NA NA13.00793 NA NA10.13265 NA10.4976910.56335 NA NA11.17371 NA11.79130
  • data.frame does not allow abritary column names, but only valid column names…

  • tibbles don’t support rownames, and the imputation methods rely on normal data.frames. Save the header row for later use.

original_header <- colnames(readr::read_csv(
  train_split,
  n_max = 1,
  col_names = TRUE,
  skip = 0
))
feat_name <- original_header[1]
original_header[1:5]
Rows: 1 Columns: 211
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (1): protein groups
dbl (180): Sample_000, Sample_002, Sample_003, Sample_004, Sample_005, Sampl...
lgl  (30): Sample_001, Sample_009, Sample_012, Sample_015, Sample_017, Sampl...

 Use `spec()` to retrieve the full column specification for this data.
 Specify the column types or set `show_col_types = FALSE` to quiet this message.
  1. 'protein groups'
  2. 'Sample_000'
  3. 'Sample_001'
  4. 'Sample_002'
  5. 'Sample_003'

Uncomment to test certain methods (only for debugging, as at least one method per package is tested using Github Actions)

Hide code cell source

# to_test <- c(
# 'ZERO',
# 'MINIMUM',
# 'COLMEDIAN',
# 'ROWMEDIAN',
# 'KNN_IMPUTE',
# 'SEQKNN',
# 'BPCA',
# 'SVDMETHOD',
# 'LLS',
# 'MLE',
# 'LLS',
# 'QRILC',
# 'MINDET',
# 'MINPROB',
# 'IRM',
# 'IMPSEQ',
# 'IMPSEQROB',
# 'MICE-NORM',
# 'MICE-CART',
# 'RF',
# 'PI',
# 'GMS', # fails to install on Windows
# 'TRKNN',
# 'MSIMPUTE'
# 'MSIMPUTE_MNAR'
# 'GSIMP'
# )

# for (method in to_test) {
#     print(method)
#     pred <- nafunctions(df, method)
# }

Impute and save predictions with original feature and column names

pred <- nafunctions(df, method)
pred <- tibble::as_tibble(cbind(rownames(pred), pred))
names(pred) <- original_header
pred
Iterations of EM: 
1...2...
Loading required package: norm
This package has some major limitations
(for example, it does not work reliably when
the number of variables exceeds 30),
and has been superseded by the norm2 package.
Warning message in norm::prelim.norm(xxm):
“NAs introduced by coercion to integer range”
A tibble: 1421 × 211
protein groupsSample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P35237 15.9123715.49990 16.11149 16.10697 15.60321 15.81161 15.49966 15.22075 15.9801321.79484 15.86264 15.65594 15.4012616.35012 15.68225 15.79804 15.73949 15.4768216.77878
A0A024R0T9;K7ER74;P02655 16.8519416.873692448.50299 17.03151 15.33051 18.61407 17.40917 17.68391 16.3862517.30955 16.61480 17.95339 18.1993817.27911 16.88635 17.554002466.87026 16.7794817.26137
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q8 15.5704715.51892 15.93532 15.80187 15.37522 15.62420 15.91185 15.38499 15.8944715.72827 15.93179 14.85871 15.1245113.36253 14.91006 15.59966 15.46896 14.9949615.17487
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O75503 16.4810816.38694 16.41577 16.97862 16.67906 15.95764 16.23389 16.41793 16.2713415.95402 16.02959 16.49753 16.5128416.51269 16.48232 15.93782 16.89773 16.1324616.23505
A0A075B6H7 17.3013818.50171 18.17480 15.963222442.09704 18.317202508.99869 17.21393 17.7940322.64528 16.48262 14.83467 13.5689314.482572434.521792454.508322466.870262468.9193212.77627
A0A075B6H9 20.2464619.94110 19.25090 19.62797 20.44983 18.68569 20.34722 20.79604 18.9952326.529552545.15500 18.26986 19.8006220.18315 17.70485 18.15450 18.63631 14.9075417.89344
A0A075B6I0 16.7644618.78624 16.83217 17.85209 18.68185 17.86543 18.24995 20.30936 17.9784318.00550 18.54142 17.80748 19.8986519.67420 17.03945 18.15179 17.950402468.9193217.74443
A0A075B6I1 17.5835717.14388 15.67073 18.87749 17.08131 14.82369 18.51969 19.35640 18.5644916.41402 17.50510 14.01911 17.5057420.251112434.52179 16.50267 16.320732468.9193216.37054
A0A075B6I6 16.9877816.98320 17.01183 14.18153 14.14021 17.04179 17.37560 16.99794 17.2031115.72543 17.06758 16.45849 16.3277816.33372 16.41304 16.86003 16.40071 16.1186815.77958
A0A075B6I9 20.0541619.06733 18.56906 18.98506 19.68564 19.37580 19.39885 20.54359 18.8439721.85263 20.14650 19.53459 19.8671320.76626 19.10211 18.53796 18.84897 18.3677618.80581
A0A075B6J9 2505.2263516.187992448.50299 13.43813 14.49484 15.90875 16.184362714.83693 16.1701917.576732545.15500 16.63193 17.4357419.77811 16.064232454.50832 17.580442468.9193216.53239
A0A075B6K4 16.1485016.12727 15.38672 16.56502 16.41771 16.454982508.99869 17.04677 16.0321018.58162 18.51737 16.48354 16.0633817.19123 15.35033 16.581732466.870262468.9193216.33787
A0A075B6K5 17.3426117.41722 17.23633 16.26717 17.39020 18.33604 17.21765 18.32488 17.5245819.51566 18.98906 17.80614 16.7481918.41254 17.15384 17.90184 17.51899 17.5022016.98874
A0A075B6P5;P01615 2505.2263515.887182448.50299 16.98965 17.49330 18.26923 17.78989 18.15334 17.7870319.924342545.155002480.85687 19.8413619.26620 17.17528 19.57506 19.30588 18.9765118.68810
A0A075B6Q5 2505.2263518.98019 15.128362468.890772442.097042495.817822508.99869 16.739492555.0813017.17138 17.02598 16.10287 15.8797214.217272434.521792454.50832 16.883552468.9193215.68019
A0A075B6R2 2505.2263517.24170 16.27981 16.77744 17.49744 17.05396 16.91770 19.77222 16.7370517.99822 18.12131 19.191842511.8050717.28589 16.59777 17.07044 16.064592468.9193216.03648
A0A075B6R9;A0A0C4DH68 19.5164019.372842448.50299 19.00923 19.54147 19.56719 20.18507 20.56454 19.5232420.12749 20.37360 19.49983 18.9773920.58832 18.07337 18.84673 19.62054 19.3392620.39256
A0A075B6S2 15.5112416.20824 14.84570 15.232422442.09704 16.28653 14.656322714.83693 14.5121215.07423 15.91304 16.65963 15.8011717.45378 14.10153 16.61213 16.21984 15.6958216.38320
A0A075B6S5 2505.2263517.126602448.502992468.890772442.09704 15.96016 16.15277 17.07842 16.8465915.51315 16.149292480.85687 15.3513315.91161 15.65170 16.034242466.870262468.9193214.57619
A0A075B6S9 16.5576517.51597 17.93999 16.95147 17.08795 17.36099 16.42103 19.79080 17.1339917.25958 20.50218 19.26507 18.4246818.19725 16.14563 19.50373 18.42501 17.7519016.21995
A0A075B7B8 14.9159413.01654 14.658652468.89077 15.34886 14.50111 15.12196 14.51142 15.3121814.589592545.155002480.856872511.8050716.362402434.521792454.508322466.870262468.9193217.44275
A0A075B7D0 18.3575417.14157 18.23018 19.26103 19.78637 19.65551 18.51593 20.50008 19.5057214.25291 19.230662480.85687 18.0771119.43528 19.04498 17.02163 18.08262 18.7376717.31493
A0A087WSV8;V9HW75 15.6758714.504922448.50299 15.55301 15.520872495.81782 15.216592714.83693 13.8527016.49812 15.52576 15.20934 15.5381413.98360 15.14917 15.890442466.87026 15.4187214.79854
A0A087WSY4 2505.2263517.14993 15.650522468.890772442.097042495.817822508.998692714.836932555.0813015.850312545.15500 16.20781 15.3381318.35346 15.127492454.508322466.870262468.9193220.34338
A0A087WSY5;Q96IY4 17.8632318.04130 17.77668 18.34178 18.42888 17.85131 17.68953 19.12984 18.0186817.86117 17.12731 17.28784 18.3573418.09213 17.11496 17.19541 17.94426 18.0847817.34736
A0A087WSY6 20.1168620.27450 19.28080 19.78190 19.72658 20.79580 19.63239 20.59759 19.9661021.28155 19.51359 19.24474 19.7366319.375432434.52179 19.79855 18.65092 18.4342219.15982
A0A087WSZ0;A0A0G2JQJ0 16.6846318.604932448.502992468.890772442.09704 13.037752508.99869 15.25869 15.5957816.06485 15.97835 14.311202511.8050717.32747 14.88972 15.63658 13.74259 16.4765213.66795
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-2 17.5937718.01240 17.82921 18.26850 17.55706 17.38051 17.31082 16.40859 17.3750616.94249 17.746552480.85687 17.6991616.74664 18.08000 17.48057 17.81535 18.1862417.91903
A0A087WTA8;P08123 17.6190818.21685 17.54445 17.50225 17.80288 16.99184 17.97739 17.70994 17.5321317.56219 16.57279 16.900662511.8050716.85046 16.93402 16.748562466.87026 17.1775717.26857
A0A087WTK0;A0A087WVC6;Q12913 15.5012115.30696 15.81471 15.55156 15.32131 16.04415 16.188632714.83693 15.5648617.67999 15.76029 15.219112511.8050717.045002434.52179 15.39396 15.246652468.9193215.21836
Q9UN70;Q9UN70-2 16.1573415.72455 16.43182 16.23052 15.956392495.81782 16.43843 15.63146 15.7212622.44797 15.68391 15.49913 15.9858515.02979 15.48571 15.52798 16.08012 15.6316515.77433
Q9UNN8 17.5605617.30159 17.764642468.89077 17.11591 18.19042 17.97682 17.00159 17.0997316.771952545.15500 17.77967 17.5903017.37821 16.45713 16.45075 17.89332 17.6361817.39555
Q9UNW1 16.9599716.51290 16.147342468.89077 16.50818 15.64714 16.41814 16.48681 16.3143814.77712 16.217682480.85687 16.0672615.66282 16.31734 16.21004 16.57037 16.1786816.53072
Q9UP79 15.1531016.86541 16.03621 15.41609 15.50277 16.33833 15.89802 14.599402555.0813020.463962545.15500 15.080152511.8050715.26104 15.739772454.508322466.87026 16.1066418.12435
Q9UPU3 18.3254018.230252448.50299 18.10040 18.17738 18.46288 17.90076 17.75121 18.7432116.78448 18.47047 19.03005 18.6485618.11089 18.42196 18.36937 17.64059 17.2421018.27864
Q9UQ52 15.3490115.54989 15.711872468.89077 15.62809 15.53902 15.40031 14.55045 15.0872415.81437 15.64994 15.602462511.8050715.04666 15.71637 16.12330 15.68571 15.8348516.18904
Q9UQM7 16.7798316.63557 16.92800 17.36468 17.26139 16.57292 16.341262714.83693 17.1852915.44938 17.13915 17.46598 15.9544616.01651 17.90825 16.22326 17.08973 16.3884517.38407
Q9Y240 16.2570016.27154 16.57780 17.133312442.09704 16.76109 16.90810 16.87980 15.2639316.74831 16.53842 16.90567 16.7234616.61396 16.71442 16.25572 16.50802 17.2861117.32832
Q9Y279;Q9Y279-2 17.1679417.44556 17.36345 17.29289 17.22047 16.98754 17.179982714.83693 17.1630415.02893 16.54589 16.58450 16.5875517.65963 16.61656 16.706332466.870262468.9193216.95138
Q9Y281;Q9Y281-3 2505.2263517.460772448.502992468.890772442.097042495.817822508.998692714.836932555.0813011.47444 12.300692480.856872511.8050718.21637 13.01033 13.076662466.870262468.9193212.54090
Q9Y287 17.6628917.588192448.50299 17.69614 17.73541 17.03094 16.95083 18.266802555.0813016.730682545.15500 16.57866 17.9707417.10728 18.27608 16.47716 16.54354 16.1737817.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-6 16.9277617.22074 17.42595 17.29861 17.82816 16.74181 16.72069 15.90499 16.3961116.22026 16.70635 17.89304 17.7065917.097182434.52179 16.95730 18.37587 17.5733317.93369
Q9Y2T3;Q9Y2T3-3 18.4676717.72113 18.44012 18.86565 18.73226 18.638772508.99869 17.11557 19.3320316.52746 18.16354 17.65092 17.5202617.41693 18.62477 18.26107 18.51419 18.0065017.90217
Q9Y490 2505.2263513.961932448.502992468.89077 13.62690 13.38121 13.741952714.836932555.0813013.041492545.155002480.856872511.8050718.02870 12.93233 13.294422466.87026 13.8408613.52534
Q9Y4L1 18.5979518.476222448.50299 18.55964 18.30465 18.52379 18.36021 17.87471 18.6725817.18673 18.41406 18.26710 18.0855717.59792 18.28991 17.97720 18.14875 17.8805518.12515
Q9Y5F6;Q9Y5F6-2 16.4692915.782402448.50299 16.52899 16.28480 16.86344 15.89953 16.10952 17.4013215.45926 16.42884 16.15950 15.6315614.73569 15.96821 16.88481 15.87772 15.5542416.57524
Q9Y5I4;Q9Y5I4-2 17.1870117.44717 17.41043 17.54514 17.29667 17.39728 17.14532 16.40482 17.3230815.89731 17.134892480.85687 17.0858815.87386 17.10354 17.10902 16.93840 17.1550816.77629
Q9Y5Y7 18.8395719.19495 19.08820 18.71497 18.66824 19.05133 19.08711 18.80245 18.7143917.13452 17.83727 18.78562 18.6136718.73747 18.72639 18.46011 19.501512468.9193218.37891
Q9Y617 16.8589716.79927 16.28782 17.074902442.09704 16.49952 16.70264 15.92398 16.6377514.58263 14.93988 15.86970 15.7117715.46507 15.80831 15.03529 16.28306 15.9196015.71292
Q9Y646 19.3219619.19029 19.70194 19.76013 19.62369 19.28628 19.62219 19.01644 19.1501219.86156 20.24484 19.20731 20.3028119.43507 19.89388 20.01457 20.30614 20.2034220.04164
Q9Y653;Q9Y653-2;Q9Y653-3 16.0124315.52794 15.22871 15.49484 14.75668 15.08563 15.788902714.83693 15.6340817.49762 14.95201 15.53361 15.0601415.667942434.52179 15.42228 15.80796 15.1574015.23671
Q9Y696 15.1777815.57553 14.72770 14.590342442.09704 14.85995 16.13632 14.94928 15.2656819.40806 15.84479 15.80784 16.1657415.91448 15.68387 16.10633 16.09780 16.7117615.65174
Q9Y6C2 2505.2263516.74583 13.75658 14.681572442.097042495.817822508.99869 14.318052555.0813019.228062545.155002480.856872511.8050714.20407 14.235612454.50832 14.402722468.9193215.21141
Q9Y6N6 15.0502114.83290 15.11828 15.14018 15.25588 14.698132508.998692714.83693 15.2927114.74282 15.12482 15.24568 15.2850215.02461 15.41504 15.34536 15.71532 14.6398314.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-4 16.8422116.59739 17.43952 17.35608 17.07495 17.08803 16.717352714.83693 17.4531918.13057 17.03425 16.74779 16.3902016.07370 17.55092 17.084472466.87026 16.5327616.74913
Q9Y6R7 2505.2263520.29933 19.59785 19.42880 19.58198 19.13033 18.69030 18.99605 19.9931819.23304 18.81636 18.90212 18.8128212.42800 17.92185 18.70814 18.72478 19.4111319.27470
Q9Y6X5 2505.2263515.55558 15.735222468.89077 15.328282495.817822508.998692714.836932555.0813015.89469 15.56189 16.08877 15.6691115.01193 16.340162454.50832 16.13827 15.8069015.73176
Q9Y6Y8;Q9Y6Y8-2 19.5628219.38575 20.44732 20.215962442.09704 19.63319 20.05686 18.68044 20.0232319.43061 19.80948 20.20436 19.4182317.84743 19.92766 19.43260 19.59912 19.5450019.57720
Q9Y6Y9 2505.2263513.96997 12.636412468.89077 13.144532495.81782 12.81741 12.896582555.0813017.161092545.15500 12.70738 12.9781212.28750 12.929412454.50832 13.637052468.9193211.04203
S4R3U6 12.8052112.44245 12.50466 12.444612442.097042495.817822508.998692714.83693 13.0079317.70124 10.132652480.85687 10.4976910.563352434.521792454.50832 11.173712468.9193211.79130

Transform predictions to long format

pred <- reshape2::melt(pred, id.vars = feat_name)
names(pred) <- c(feat_name, 'Sample ID', method)
pred <- pred[reshape2::melt(is.na(df))['value'] == TRUE, ]
pred
A data.frame: 71601 × 3
protein groupsSample IDMLE
<chr><fct><dbl>
11A0A075B6J9 Sample_0002505.226
14A0A075B6P5;P01615 Sample_0002505.226
15A0A075B6Q5 Sample_0002505.226
16A0A075B6R2 Sample_0002505.226
19A0A075B6S5 Sample_0002505.226
24A0A087WSY4 Sample_0002505.226
34A0A087WU43;A0A087WX17;A0A087WXI5;P12830;P12830-2 Sample_0002505.226
39A0A087WW87;A0A087X0Q4;P01614 Sample_0002505.226
40A0A087WWA5 Sample_0002505.226
42A0A087WWT2;Q9NPD7 Sample_0002505.226
43A0A087WX80;P24043 Sample_0002505.226
45A0A087WXE9;E9PQ70;Q6UXH9;Q6UXH9-2;Q6UXH9-3 Sample_0002505.226
48A0A087WYK9;Q02985;Q02985-2;Q6NSD3 Sample_0002505.226
52A0A087WZR4 Sample_0002505.226
53A0A087X089;Q16627;Q16627-2 Sample_0002505.226
56A0A087X0M8 Sample_0002505.226
59A0A087X117;A0A0G2JN29;J3KN36;P69849;Q15155;Q5JPE7;Q5JPE7-2 Sample_0002505.226
69A0A0A0MQS9;A0A0A0MTC7;Q16363;Q16363-2 Sample_0002505.226
78A0A0A0MS20;A0A0A0MSZ8;A0A0G2JM38;A0A0G2JM43;A0A0G2JM57;A0A0G2JM84;A0A0G2JMH7;A0A0G2JML1;A0A0G2JNE9;A0A0G2JNL1;A0A0G2JP25;A0A0G2JP84;A0A0G2JPA9;A0A0G2JPC7;A0A0G2JPU4;A0A0G2JPX5;A0A0G2JQ10;A0A0G2JQ20;A8MUE1;C9JST2;Q8NHJ6;Q8NHJ6-2;Q8NHJ6-3Sample_0002505.226
85A0A0A0MT32;P38571;P38571-2 Sample_0002505.226
87A0A0A0MT66 Sample_0002505.226
90A0A0A0MTC8;Q9NQ36;Q9NQ36-2;Q9NQ36-3 Sample_0002505.226
96A0A0A6YY99;O43508-2 Sample_0002505.226
101A0A0B4J1V2 Sample_0002505.226
102A0A0B4J1V6 Sample_0002505.226
105A0A0B4J1Z1;C9JAB2;Q16629;Q16629-2;Q16629-3;Q16629-4 Sample_0002505.226
108A0A0B4J2B5;S4R460 Sample_0002505.226
109A0A0B4J2C3;P13693;Q5W0H4 Sample_0002505.226
111A0A0C4DFP6;Q9NQ79;Q9NQ79-2 Sample_0002505.226
113A0A0C4DG76;Q5JZ08;Q9BQS7;Q9BQS7-2;Q9BQS7-3;Q9BQS7-4 Sample_0002505.226
298242Q96PQ0 Sample_20911.72617
298244Q96RW7;Q96RW7-2 Sample_20921.51925
298246Q96SM3 Sample_20916.05074
298249Q99538 Sample_20915.77972
298252Q99650;Q99650-2 Sample_20918.56947
298261Q9BQT9;Q9BQT9-2 Sample_20916.33259
298266Q9BT88 Sample_20922.67178
298273Q9BX67 Sample_20918.45231
298277Q9BXP8 Sample_20912.11186
298287Q9H3T2;Q9H3T2-3 Sample_20919.93308
298288Q9H3T3;Q9H3T3-3 Sample_20919.87412
298289Q9H492;Q9H492-2 Sample_20919.07592
298297Q9H8L6 Sample_20913.31142
298299Q9HBR0 Sample_20915.08597
298302Q9HC38-2 Sample_20921.23231
298303Q9HC57 Sample_20919.64863
298310Q9NPH3;Q9NPH3-2;Q9NPH3-5Sample_20916.12520
298314Q9NRB3 Sample_20917.35880
298316Q9NS00;Q9NS00-2 Sample_20918.61479
298317Q9NS84 Sample_20918.63378
298328Q9NY97;Q9NY97-2 Sample_20916.54474
298341Q9P1W8;Q9P1W8-2;Q9P1W8-4Sample_20911.23278
298349Q9UGM5;Q9UGM5-2 Sample_20913.60756
298355Q9UI40;Q9UI40-2 Sample_20918.03998
298357Q9UIW2 Sample_20920.47834
298358Q9UJ14 Sample_20917.15138
298377Q9UMX0;Q9UMX0-2;Q9UMX0-4Sample_20922.33701
298384Q9UP79 Sample_20918.12435
298388Q9Y240 Sample_20917.32832
298398Q9Y5Y7 Sample_20918.37891

Check dimension of long format dataframe

Hide code cell source

dim(pred)
  1. 71601
  2. 3

Save predictions to disk

Hide code cell source

fname = file.path(folder_experiment,
                  'preds',
                  paste0('pred_all_', toupper(method), '.csv'))
write_csv(pred, path = fname)
fname
Warning message:
“The `path` argument of `write_csv()` is deprecated as of readr 1.4.0.
 Please use the `file` argument instead.”
'runs/alzheimer_study/preds/pred_all_MLE.csv'