NAGuide R methods

NAGuide R methods#

Setup basic methods and packages used for all methods

  • BiocManager could be moved to methods who are installed from BioConductor

Hide code cell source

# options("install.lock"=FALSE)

packages_base_R <-
  c("BiocManager", "reshape2", "data.table", "readr", "tibble")

install_rpackage  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
  
}

# used in the large imputation function for two packages
install_bioconductor  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    BiocManager::install(pkg)
    library(pkg, character.only = TRUE)
  }
  
}


for (package in packages_base_R) {
  # Check if the package is already installed
  install_rpackage(pkg = package)
}
Loading required package: BiocManager
Loading required package: reshape2
Loading required package: data.table

Attaching package: ‘data.table’
The following objects are masked from ‘package:reshape2’:

    dcast, melt
Loading required package: readr
Loading required package: tibble

setup can be tricky… trying to integrate as much as possible into conda environment

Copied from NAGuideR’s github RShiny application. Adapted to run as standalone function in context of the Snakemake workflow.

  • df and df1 ?

  • seems quite hacky

  • code is only slightly adapted from repo to run here, mainly to install packages on the fly

Hide code cell source

nafunctions <- function(x, method = "zero") {
  df <- df1 <- as.data.frame(x)
  method <- tolower(method)
  if (method == "zero") {
    df[is.na(df)] <- 0
  }
  else if (method == "minimum") {
    df[is.na(df)] <- min(df1, na.rm = TRUE)
  }
  else if (method == "colmedian") {
    install_rpackage('e1071')
    df <- impute(df1, what = "median")
  }
  else if (method == "rowmedian") {
    install_rpackage('e1071')
    dfx <- impute(t(df1), what = "median")
    df <- t(dfx)
  }
  else if (method == "knn_impute") {
    install_bioconductor('impute')
    data_zero1 <-
      impute.knn(as.matrix(df1),
                 k = 10,
                 rowmax = 1,
                 colmax = 1)#rowmax = 0.9, colmax = 0.9
    df <- data_zero1$data
  }
  else if (method == "seqknn") {
    if (!require(SeqKnn)) {
      install.packages("src/R_NAGuideR/SeqKnn_1.0.1.tar.gz",
                       repos = NULL,
                       type = "source")
      library(SeqKnn)
    }
    df <- SeqKNN(df1, k = 10)
  }
  else if (method == "bpca") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(
        as.matrix(df1),
        nPcs = ncol(df1) - 1,
        method = "bpca",
        maxSteps = 100
      )
    df <- completeObs(data_zero1)
  }
  else if (method == "svdmethod") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(as.matrix(df1),
                      nPcs = ncol(df1) - 1,
                      method = "svdImpute")
    df <- completeObs(data_zero1)
  }
  else if (method == "lls") {
    install_bioconductor('pcaMethods')
    data_zero1 <- llsImpute(t(df1), k = 10)
    df <- t(completeObs(data_zero1))
  }
  else if (method == "mle") {
    install_rpackage('norm')
    xxm <- as.matrix(df1)
    ss <- norm::prelim.norm(xxm)
    thx <- norm::em.norm(ss)
    norm::rngseed(123)
    df <- norm::imp.norm(ss, thx, xxm)
  }
  else if (method == "qrilc") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- t(df1)
    data_zero1 <-
      imputeLCMD::impute.QRILC(xxm, tune.sigma = 1)[[1]]
    df <- t(data_zero1)
  }
  else if (method == "mindet") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <- imputeLCMD::impute.MinDet(xxm, q = 0.01)
  }
  else if (method == "minprob") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <-
      imputeLCMD::impute.MinProb(xxm, q = 0.01, tune.sigma = 1)
  }
  else if (method == "irm") {
    install_rpackage('VIM')
    df <- irmi(df1, trace = TRUE, imp_var = FALSE)
    rownames(df) <- rownames(df1)
  }
  else if (method == "impseq") {
    install_rpackage('rrcovNA')
    df <- impSeq(df1)
  }
  else if (method == "impseqrob") {
    install_rpackage('rrcovNA')
    data_zero1 <- impSeqRob(df1, alpha = 0.9)
    df <- data_zero1$x
  }
  else if (method == "mice-norm") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "norm")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "mice-cart") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "cart")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "trknn") {
    source('src/R_NAGuideR/Imput_funcs.r')
    # sim_trKNN_wrapper <- function(data) {
    #   result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t
    #   return(result)
    # }
    # df1x <- sim_trKNN_wrapper(t(df1))
    # df<-as.data.frame(t(df1x))
    df <-
      imputeKNN(as.matrix(df),
                k = 10,
                distance = 'truncation',
                perc = 0)
    df <- as.data.frame(df)
  }
  else if (method == "rf") {
    install_rpackage("missForest")
    data_zero1 <- missForest(
      t(df1),
      maxiter = 10,
      ntree = 20 # input$rfntrees
      ,
      mtry = floor(nrow(df1) ^ (1 / 3)),
      verbose = TRUE
    )
    df <- t(data_zero1$ximp)
  }
  else if (method == "pi") {
    width <- 0.3 # input$piwidth
    downshift <- 1.8 # input$pidownshift
    for (i in 1:ncol(df1)) {
      temp <- df1[[i]]
      if (sum(is.na(temp)) > 0) {
        temp.sd <- width * sd(temp[!is.na(temp)], na.rm = TRUE)
        temp.mean <-
          mean(temp[!is.na(temp)], na.rm = TRUE) - downshift * sd(temp[!is.na(temp)], na.rm = TRUE)
        n.missing <- sum(is.na(temp))
        temp[is.na(temp)] <-
          rnorm(n.missing, mean = temp.mean, sd = temp.sd)
        df[[i]] <- temp
      }
    }
    df
  }
  # else if(method=="grr"){
  #   library(DreamAI)
  #   df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
  # }
  else if (method == "gms") {
    # install.packages('GMSimpute')
    if (!require(GMSimpute)) {
      install.packages(
        "src/R_NAGuideR/GMSimpute_0.0.1.1.tar.gz",
        repos = NULL,
        type = "source"
      )
      
      library(GMSimpute)
    }
    
    df <- GMS.Lasso(df1,
                    nfolds = 3,
                    log.scale = FALSE,
                    TS.Lasso = TRUE)
  }
  else if (method == "msimpute") {
    install_bioconductor("msImpute")
    df <- msImpute(as.matrix(df),
                   method = 'v2')
    df <- as.data.frame(df)
  }
  else if (method == "msimpute_mnar") {
    install_bioconductor("msImpute")
    df <-
      msImpute(as.matrix(df),
               method = 'v2-mnar',
               group = rep(1, dim(df)[2]))
    df <- as.data.frame(df)
  }
  else if (method == "gsimp") {
    options(stringsAsFactors = F)
    # dependencies parly for sourced file
    
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    install_rpackage("magrittr")
    install_rpackage("glmnet")
    install_rpackage("abind")
    install_rpackage("foreach")
    install_rpackage("doParallel")
    source('src/R_NAGuideR/GSimp.R')
    
    # wrapper function with data pre-processing
    pre_processing_GS_wrapper <- function(data_raw_log) {
      # samples in rows, features in columns #
      # Initialization #
      data_raw_log_qrilc <- as.data.frame(data_raw_log) %>%
        impute.QRILC() %>% extract2(1)
      # Centralization and scaling #
      data_raw_log_qrilc_sc <-
        scale_recover(data_raw_log_qrilc, method = 'scale')
      # Data after centralization and scaling #
      data_raw_log_qrilc_sc_df <- data_raw_log_qrilc_sc[[1]]
      # Parameters for centralization and scaling (for scaling recovery) #
      data_raw_log_qrilc_sc_df_param <- data_raw_log_qrilc_sc[[2]]
      # NA position #
      NA_pos <- which(is.na(data_raw_log), arr.ind = T)
      # NA introduced to log-scaled-initialized data #
      data_raw_log_sc <- data_raw_log_qrilc_sc_df
      data_raw_log_sc[NA_pos] <- NA
      # Feed initialized and missing data into GSimp imputation #
      result <-
        data_raw_log_sc %>% GS_impute(
          .,
          iters_each = 50,
          iters_all = 10,
          initial = data_raw_log_qrilc_sc_df,
          lo = -Inf,
          hi = 'min',
          n_cores = 1,
          imp_model = 'glmnet_pred'
        )
      data_imp_log_sc <- result$data_imp
      # Data recovery #
      data_imp <- data_imp_log_sc %>%
        scale_recover(., method = 'recover',
                      param_df = data_raw_log_qrilc_sc_df_param) %>%
        extract2(1)
      return(data_imp)
    }
    df <- t(df) # samples in rows, feature in columns
    df <- pre_processing_GS_wrapper(df)
    df <- t(df) # features in rows, samples in columns
    
  }
  else{
    stop(paste("Unspported methods so far: ", method))
  }
  df <- as.data.frame(df)
  df
}

Parameters#

Choose one of the available methods. Some methods might fail for your dataset for unknown reasons (and the error won’t always be easy to understand)

method = 'ZERO'
method = 'MINIMUM'
method = 'COLMEDIAN'
method = 'ROWMEDIAN'
method = 'KNN_IMPUTE'
method = 'SEQKNN'
method = 'BPCA'
method = 'SVDMETHOD'
method = 'LLS'
method = 'MLE'
mehtod = 'LLS'
method = 'QRILC'
method = 'MINDET'
method = 'MINPROB'
method = 'IRM'
method = 'IMPSEQ'
method = 'IMPSEQROB'
method = 'MICE-NORM'
method = 'MICE-CART'
method = 'RF'
method = 'PI'
method = 'GMS'
method = 'TRKNN',
method = 'MSIMPUTE'
method = 'MSIMPUTE_MNAR'
method = 'GSIMP'
train_split = 'runs/example/data/data_wide_sample_cols.csv' # test
folder_experiment = 'runs/example/'
method = 'KNN_IMPUTE'
# Parameters
train_split = "runs/alzheimer_study/data/data_wide_sample_cols.csv"
method = "ZERO"
folder_experiment = "runs/alzheimer_study"

Dump predictions#

df <-
  utils::read.csv(
    train_split,
    row.names = 1,
    header = TRUE,
    stringsAsFactors = FALSE
  )
df
A data.frame: 1421 × 210
Sample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_009Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P3523715.91237 NA16.1114916.1069715.6032115.8116115.4996615.2207515.98013 NA NA15.8626415.6559415.40126 NA15.6822515.7980415.7394915.47682 NA
A0A024R0T9;K7ER74;P0265516.8519416.87369 NA17.0315115.3305118.6140717.4091717.6839116.3862516.5897217.3095516.6148017.9533918.1993817.2791116.8863517.55400 NA16.7794817.26137
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q815.5704715.5189215.9353215.8018715.3752215.6242015.9118515.3849915.8944715.37538 NA15.9317914.8587115.12451 NA14.9100615.5996615.4689614.9949615.17487
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O7550316.4810816.3869416.4157716.9786216.6790615.9576416.2338916.4179316.2713416.2097015.9540216.0295916.4975316.5128416.5126916.4823215.9378216.8977316.1324616.23505
A0A075B6H717.30138 NA18.1748015.96322 NA18.31720 NA17.2139317.79403 NA NA16.4826214.8346713.5689314.48257 NA NA NA NA NA
A0A075B6H920.2464619.9411019.2509019.6279720.4498318.6856920.3472220.7960418.9952319.24667 NA NA18.2698619.8006220.1831517.7048518.1545018.6363114.9075417.89344
A0A075B6I016.7644618.7862416.8321717.8520918.6818517.8654318.2499520.3093617.9784319.4104718.0055018.5414217.8074819.8986519.6742017.0394518.1517917.95040 NA17.74443
A0A075B6I117.5835717.1438815.6707318.8774917.0813114.8236918.5196919.3564018.5644917.9631016.4140217.5051014.0191117.5057420.25111 NA16.5026716.32073 NA16.37054
A0A075B6I616.98778 NA17.0118314.1815314.1402117.0417917.3756016.9979417.2031116.7583915.7254317.0675816.4584916.3277816.3337216.4130416.8600316.4007116.1186815.77958
A0A075B6I920.0541619.0673318.5690618.9850619.6856419.3758019.3988520.5435918.8439718.8997521.8526320.1465019.5345919.8671320.7662619.1021118.5379618.8489718.3677618.80581
A0A075B6J9 NA16.18799 NA13.4381314.4948415.9087516.18436 NA16.1701916.32362 NA NA16.6319317.4357419.7781116.06423 NA17.58044 NA16.53239
A0A075B6K416.1485016.1272715.3867216.5650216.4177116.45498 NA17.0467716.0321016.1490818.5816218.5173716.4835416.0633817.1912315.3503316.58173 NA NA16.33787
A0A075B6K517.3426117.4172217.2363316.2671717.3902018.3360417.2176518.3248817.5245818.7760819.5156618.9890617.8061416.7481918.4125417.1538417.9018417.5189917.5022016.98874
A0A075B6P5;P01615 NA NA NA16.9896517.4933018.2692317.7898918.1533417.7870318.3521319.92434 NA NA19.8413619.2662017.1752819.5750619.3058818.9765118.68810
A0A075B6Q5 NA NA15.12836 NA NA NA NA16.73949 NA NA NA17.0259816.1028715.87972 NA NA NA16.88355 NA15.68019
A0A075B6R2 NA NA16.2798116.7774417.4974417.0539616.9177019.7722216.7370517.9567017.9982218.1213119.19184 NA17.2858916.5977717.0704416.06459 NA16.03648
A0A075B6R9;A0A0C4DH6819.5164019.37284 NA19.0092319.5414719.5671920.1850720.5645419.5232419.3871620.1274920.3736019.4998318.9773920.5883218.0733718.8467319.6205419.3392620.39256
A0A075B6S215.5112416.2082414.8457015.23242 NA16.2865314.65632 NA14.5121216.5692515.0742315.9130416.6596315.8011717.4537814.1015316.6121316.2198415.6958216.38320
A0A075B6S5 NA NA NA NA NA15.9601616.1527717.0784216.8465916.3338515.5131516.14929 NA15.3513315.9116115.6517016.03424 NA NA NA
A0A075B6S916.5576517.5159717.9399916.9514717.0879517.3609916.4210319.7908017.1339915.4731717.2595820.5021819.2650718.4246818.1972516.1456319.5037318.4250117.7519016.21995
A0A075B7B814.91594 NA14.65865 NA15.3488614.5011115.1219614.5114215.31218 NA NA NA NA NA NA NA NA NA NA NA
A0A075B7D018.35754 NA18.2301819.2610319.7863719.6555118.5159320.5000819.5057220.00935 NA19.23066 NA18.0771119.4352819.0449817.0216318.0826218.7376717.31493
A0A087WSV8;V9HW7515.6758714.50492 NA15.5530115.52087 NA15.21659 NA13.85270 NA NA15.5257615.2093415.5381413.9836015.1491715.89044 NA15.4187214.79854
A0A087WSY4 NA17.1499315.65052 NA NA NA NA NA NA NA15.85031 NA16.2078115.33813 NA15.12749 NA NA NA20.34338
A0A087WSY5;Q96IY417.8632318.0413017.7766818.3417818.4288817.8513117.6895319.1298418.0186818.2319617.8611717.1273117.2878418.3573418.0921317.1149617.1954117.9442618.0847817.34736
A0A087WSY620.1168620.2745019.2808019.7819019.7265820.7958019.6323920.5975919.9661018.7243921.2815519.5135919.2447419.7366319.37543 NA19.7985518.6509218.4342219.15982
A0A087WSZ0;A0A0G2JQJ016.68463 NA NA NA NA13.03775 NA15.2586915.5957813.63975 NA15.9783514.31120 NA17.3274714.8897215.6365813.7425916.4765213.66795
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-217.5937718.0124017.8292118.2685017.5570617.3805117.3108216.4085917.3750618.0290816.9424917.74655 NA17.69916 NA18.0800017.4805717.8153518.1862417.91903
A0A087WTA8;P0812317.6190818.2168517.5444517.5022517.8028816.9918417.9773917.7099417.5321317.3504517.5621916.5727916.90066 NA NA16.9340216.74856 NA17.1775717.26857
A0A087WTK0;A0A087WVC6;Q1291315.5012115.3069615.8147115.5515615.3213116.0441516.18863 NA15.5648615.37830 NA15.7602915.21911 NA NA NA15.3939615.24665 NA NA
Q9UN70;Q9UN70-216.1573415.7245516.4318216.2305215.95639 NA16.4384315.6314615.7212615.74455 NA15.6839115.4991315.9858515.0297915.4857115.5279816.0801215.6316515.77433
Q9UNN817.5605617.3015917.76464 NA17.1159118.1904217.9768217.0015917.09973 NA16.77195 NA17.7796717.5903017.3782116.4571316.4507517.8933217.6361817.39555
Q9UNW116.9599716.5129016.14734 NA16.5081815.6471416.4181416.4868116.3143817.10669 NA16.21768 NA16.06726 NA16.3173416.2100416.5703716.1786816.53072
Q9UP7915.15310 NA16.0362115.4160915.5027716.3383315.8980214.59940 NA15.94205 NA NA15.08015 NA15.2610415.73977 NA NA16.10664 NA
Q9UPU318.3254018.23025 NA18.1004018.1773818.4628817.9007617.7512118.7432117.7462416.7844818.4704719.0300518.6485618.1108918.4219618.3693717.6405917.2421018.27864
Q9UQ5215.3490115.5498915.71187 NA15.6280915.5390215.4003114.5504515.0872415.2123315.8143715.6499415.60246 NA15.0466615.7163716.1233015.6857115.8348516.18904
Q9UQM716.7798316.6355716.9280017.3646817.2613916.5729216.34126 NA17.18529 NA15.4493817.1391517.4659815.9544616.0165117.9082516.2232617.0897316.3884517.38407
Q9Y24016.2570016.2715416.5778017.13331 NA16.7610916.9081016.8798015.2639316.6680716.7483116.5384216.9056716.7234616.6139616.7144216.2557216.5080217.28611 NA
Q9Y279;Q9Y279-217.1679417.4455617.3634517.2928917.2204716.9875417.17998 NA17.1630416.5564115.0289316.5458916.5845016.5875517.6596316.6165616.70633 NA NA16.95138
Q9Y281;Q9Y281-3 NA NA NA NA NA NA NA NA NA NA NA12.30069 NA NA NA13.0103313.07666 NA NA12.54090
Q9Y28717.6628917.58819 NA17.6961417.7354117.0309416.9508318.26680 NA16.89611 NA NA16.5786617.9707417.1072818.2760816.4771616.5435416.1737817.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-616.9277617.2207417.4259517.2986117.8281616.7418116.7206915.9049916.3961117.3693516.2202616.7063517.8930417.7065917.09718 NA16.9573018.3758717.5733317.93369
Q9Y2T3;Q9Y2T3-318.4676717.7211318.4401218.8656518.7322618.63877 NA17.1155719.3320318.0933816.5274618.1635417.6509217.5202617.4169318.6247718.2610718.5141918.0065017.90217
Q9Y490 NA13.96193 NA NA13.6269013.3812113.74195 NA NA13.37095 NA NA NA NA NA12.9323313.29442 NA13.8408613.52534
Q9Y4L118.5979518.47622 NA18.5596418.3046518.5237918.3602117.8747118.6725818.4313117.1867318.4140618.2671018.0855717.5979218.2899117.9772018.1487517.8805518.12515
Q9Y5F6;Q9Y5F6-216.4692915.78240 NA16.5289916.2848016.8634415.8995316.1095217.4013216.3436715.4592616.4288416.1595015.6315614.7356915.9682116.8848115.8777215.5542416.57524
Q9Y5I4;Q9Y5I4-217.1870117.4471717.4104317.5451417.2966717.3972817.1453216.4048217.3230817.4575315.8973117.13489 NA17.0858815.8738617.1035417.1090216.9384017.1550816.77629
Q9Y5Y718.8395719.1949519.0882018.7149718.6682419.0513319.0871118.8024518.7143918.7874517.1345217.8372718.7856218.6136718.7374718.7263918.4601119.50151 NA NA
Q9Y61716.8589716.7992716.2878217.07490 NA16.4995216.7026415.9239816.6377516.4427714.5826314.9398815.8697015.7117715.4650715.8083115.0352916.2830615.9196015.71292
Q9Y64619.3219619.1902919.7019419.7601319.6236919.2862819.6221919.0164419.1501219.5165219.8615620.2448419.2073120.3028119.4350719.8938820.0145720.3061420.2034220.04164
Q9Y653;Q9Y653-2;Q9Y653-316.0124315.5279415.2287115.4948414.7566815.0856315.78890 NA15.63408 NA NA14.9520115.5336115.0601415.66794 NA15.4222815.8079615.1574015.23671
Q9Y69615.1777815.5755314.7277014.59034 NA14.8599516.1363214.9492815.2656814.63326 NA15.8447915.8078416.1657415.9144815.6838716.1063316.0978016.7117615.65174
Q9Y6C2 NA NA13.7565814.68157 NA NA NA14.31805 NA NA NA NA NA NA14.2040714.23561 NA14.40272 NA15.21141
Q9Y6N615.0502114.8329015.1182815.1401815.2558814.69813 NA NA15.29271 NA14.7428215.1248215.2456815.2850215.0246115.4150415.3453615.7153214.6398314.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-416.8422116.5973917.4395217.3560817.0749517.0880316.71735 NA17.4531916.91617 NA17.0342516.7477916.39020 NA17.5509217.08447 NA16.5327616.74913
Q9Y6R7 NA20.2993319.5978519.4288019.5819819.1303318.6903018.9960519.9931818.5503419.2330418.8163618.9021218.81282 NA17.9218518.7081418.7247819.4111319.27470
Q9Y6X5 NA15.5555815.73522 NA15.32828 NA NA NA NA NA15.8946915.5618916.0887715.6691115.0119316.34016 NA16.1382715.8069015.73176
Q9Y6Y8;Q9Y6Y8-219.5628219.3857520.4473220.21596 NA19.6331920.0568618.6804420.0232319.94839 NA19.8094820.2043619.4182317.8474319.9276619.4326019.5991219.5450019.57720
Q9Y6Y9 NA13.9699712.63641 NA13.14453 NA12.8174112.89658 NA13.68470 NA NA12.7073812.9781212.2875012.92941 NA13.63705 NA11.04203
S4R3U612.8052112.4424512.5046612.44461 NA NA NA NA13.00793 NA NA10.13265 NA10.4976910.56335 NA NA11.17371 NA11.79130
  • data.frame does not allow abritary column names, but only valid column names…

  • tibbles don’t support rownames, and the imputation methods rely on normal data.frames. Save the header row for later use.

original_header <- colnames(readr::read_csv(
  train_split,
  n_max = 1,
  col_names = TRUE,
  skip = 0
))
feat_name <- original_header[1]
original_header[1:5]
Rows: 1 Columns: 211
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (1): protein groups
dbl (180): Sample_000, Sample_002, Sample_003, Sample_004, Sample_005, Sampl...
lgl  (30): Sample_001, Sample_009, Sample_012, Sample_015, Sample_017, Sampl...

 Use `spec()` to retrieve the full column specification for this data.
 Specify the column types or set `show_col_types = FALSE` to quiet this message.
  1. 'protein groups'
  2. 'Sample_000'
  3. 'Sample_001'
  4. 'Sample_002'
  5. 'Sample_003'

Uncomment to test certain methods (only for debugging, as at least one method per package is tested using Github Actions)

Hide code cell source

# to_test <- c(
# 'ZERO',
# 'MINIMUM',
# 'COLMEDIAN',
# 'ROWMEDIAN',
# 'KNN_IMPUTE',
# 'SEQKNN',
# 'BPCA',
# 'SVDMETHOD',
# 'LLS',
# 'MLE',
# 'LLS',
# 'QRILC',
# 'MINDET',
# 'MINPROB',
# 'IRM',
# 'IMPSEQ',
# 'IMPSEQROB',
# 'MICE-NORM',
# 'MICE-CART',
# 'RF',
# 'PI',
# 'GMS', # fails to install on Windows
# 'TRKNN',
# 'MSIMPUTE'
# 'MSIMPUTE_MNAR'
# 'GSIMP'
# )

# for (method in to_test) {
#     print(method)
#     pred <- nafunctions(df, method)
# }

Impute and save predictions with original feature and column names

pred <- nafunctions(df, method)
pred <- tibble::as_tibble(cbind(rownames(pred), pred))
names(pred) <- original_header
pred
A tibble: 1421 × 211
protein groupsSample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P35237 15.91237 0.0000016.1114916.1069715.6032115.8116115.4996615.2207515.98013 0.0000015.8626415.6559415.40126 0.0000015.6822515.7980415.7394915.47682 0.00000
A0A024R0T9;K7ER74;P02655 16.8519416.87369 0.0000017.0315115.3305118.6140717.4091717.6839116.3862517.3095516.6148017.9533918.1993817.2791116.8863517.55400 0.0000016.7794817.26137
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q815.5704715.5189215.9353215.8018715.3752215.6242015.9118515.3849915.89447 0.0000015.9317914.8587115.12451 0.0000014.9100615.5996615.4689614.9949615.17487
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O75503 16.4810816.3869416.4157716.9786216.6790615.9576416.2338916.4179316.2713415.9540216.0295916.4975316.5128416.5126916.4823215.9378216.8977316.1324616.23505
A0A075B6H7 17.30138 0.0000018.1748015.96322 0.0000018.31720 0.0000017.2139317.79403 0.0000016.4826214.8346713.5689314.48257 0.00000 0.00000 0.00000 0.00000 0.00000
A0A075B6H9 20.2464619.9411019.2509019.6279720.4498318.6856920.3472220.7960418.99523 0.00000 0.0000018.2698619.8006220.1831517.7048518.1545018.6363114.9075417.89344
A0A075B6I0 16.7644618.7862416.8321717.8520918.6818517.8654318.2499520.3093617.9784318.0055018.5414217.8074819.8986519.6742017.0394518.1517917.95040 0.0000017.74443
A0A075B6I1 17.5835717.1438815.6707318.8774917.0813114.8236918.5196919.3564018.5644916.4140217.5051014.0191117.5057420.25111 0.0000016.5026716.32073 0.0000016.37054
A0A075B6I6 16.98778 0.0000017.0118314.1815314.1402117.0417917.3756016.9979417.2031115.7254317.0675816.4584916.3277816.3337216.4130416.8600316.4007116.1186815.77958
A0A075B6I9 20.0541619.0673318.5690618.9850619.6856419.3758019.3988520.5435918.8439721.8526320.1465019.5345919.8671320.7662619.1021118.5379618.8489718.3677618.80581
A0A075B6J9 0.0000016.18799 0.0000013.4381314.4948415.9087516.18436 0.0000016.17019 0.00000 0.0000016.6319317.4357419.7781116.06423 0.0000017.58044 0.0000016.53239
A0A075B6K4 16.1485016.1272715.3867216.5650216.4177116.45498 0.0000017.0467716.0321018.5816218.5173716.4835416.0633817.1912315.3503316.58173 0.00000 0.0000016.33787
A0A075B6K5 17.3426117.4172217.2363316.2671717.3902018.3360417.2176518.3248817.5245819.5156618.9890617.8061416.7481918.4125417.1538417.9018417.5189917.5022016.98874
A0A075B6P5;P01615 0.00000 0.00000 0.0000016.9896517.4933018.2692317.7898918.1533417.7870319.92434 0.00000 0.0000019.8413619.2662017.1752819.5750619.3058818.9765118.68810
A0A075B6Q5 0.00000 0.0000015.12836 0.00000 0.00000 0.00000 0.0000016.73949 0.00000 0.0000017.0259816.1028715.87972 0.00000 0.00000 0.0000016.88355 0.0000015.68019
A0A075B6R2 0.00000 0.0000016.2798116.7774417.4974417.0539616.9177019.7722216.7370517.9982218.1213119.19184 0.0000017.2858916.5977717.0704416.06459 0.0000016.03648
A0A075B6R9;A0A0C4DH68 19.5164019.37284 0.0000019.0092319.5414719.5671920.1850720.5645419.5232420.1274920.3736019.4998318.9773920.5883218.0733718.8467319.6205419.3392620.39256
A0A075B6S2 15.5112416.2082414.8457015.23242 0.0000016.2865314.65632 0.0000014.5121215.0742315.9130416.6596315.8011717.4537814.1015316.6121316.2198415.6958216.38320
A0A075B6S5 0.00000 0.00000 0.00000 0.00000 0.0000015.9601616.1527717.0784216.8465915.5131516.14929 0.0000015.3513315.9116115.6517016.03424 0.00000 0.00000 0.00000
A0A075B6S9 16.5576517.5159717.9399916.9514717.0879517.3609916.4210319.7908017.1339917.2595820.5021819.2650718.4246818.1972516.1456319.5037318.4250117.7519016.21995
A0A075B7B8 14.91594 0.0000014.65865 0.0000015.3488614.5011115.1219614.5114215.31218 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
A0A075B7D0 18.35754 0.0000018.2301819.2610319.7863719.6555118.5159320.5000819.50572 0.0000019.23066 0.0000018.0771119.4352819.0449817.0216318.0826218.7376717.31493
A0A087WSV8;V9HW75 15.6758714.50492 0.0000015.5530115.52087 0.0000015.21659 0.0000013.85270 0.0000015.5257615.2093415.5381413.9836015.1491715.89044 0.0000015.4187214.79854
A0A087WSY4 0.0000017.1499315.65052 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000015.85031 0.0000016.2078115.33813 0.0000015.12749 0.00000 0.00000 0.0000020.34338
A0A087WSY5;Q96IY4 17.8632318.0413017.7766818.3417818.4288817.8513117.6895319.1298418.0186817.8611717.1273117.2878418.3573418.0921317.1149617.1954117.9442618.0847817.34736
A0A087WSY6 20.1168620.2745019.2808019.7819019.7265820.7958019.6323920.5975919.9661021.2815519.5135919.2447419.7366319.37543 0.0000019.7985518.6509218.4342219.15982
A0A087WSZ0;A0A0G2JQJ0 16.68463 0.00000 0.00000 0.00000 0.0000013.03775 0.0000015.2586915.59578 0.0000015.9783514.31120 0.0000017.3274714.8897215.6365813.7425916.4765213.66795
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-2 17.5937718.0124017.8292118.2685017.5570617.3805117.3108216.4085917.3750616.9424917.74655 0.0000017.69916 0.0000018.0800017.4805717.8153518.1862417.91903
A0A087WTA8;P08123 17.6190818.2168517.5444517.5022517.8028816.9918417.9773917.7099417.5321317.5621916.5727916.90066 0.00000 0.0000016.9340216.74856 0.0000017.1775717.26857
A0A087WTK0;A0A087WVC6;Q12913 15.5012115.3069615.8147115.5515615.3213116.0441516.18863 0.0000015.56486 0.0000015.7602915.21911 0.00000 0.00000 0.0000015.3939615.24665 0.00000 0.00000
Q9UN70;Q9UN70-2 16.1573415.7245516.4318216.2305215.95639 0.0000016.4384315.6314615.72126 0.0000015.6839115.4991315.9858515.0297915.4857115.5279816.0801215.6316515.77433
Q9UNN8 17.5605617.3015917.76464 0.0000017.1159118.1904217.9768217.0015917.0997316.77195 0.0000017.7796717.5903017.3782116.4571316.4507517.8933217.6361817.39555
Q9UNW1 16.9599716.5129016.14734 0.0000016.5081815.6471416.4181416.4868116.31438 0.0000016.21768 0.0000016.06726 0.0000016.3173416.2100416.5703716.1786816.53072
Q9UP79 15.15310 0.0000016.0362115.4160915.5027716.3383315.8980214.59940 0.00000 0.00000 0.0000015.08015 0.0000015.2610415.73977 0.00000 0.0000016.10664 0.00000
Q9UPU3 18.3254018.23025 0.0000018.1004018.1773818.4628817.9007617.7512118.7432116.7844818.4704719.0300518.6485618.1108918.4219618.3693717.6405917.2421018.27864
Q9UQ52 15.3490115.5498915.71187 0.0000015.6280915.5390215.4003114.5504515.0872415.8143715.6499415.60246 0.0000015.0466615.7163716.1233015.6857115.8348516.18904
Q9UQM7 16.7798316.6355716.9280017.3646817.2613916.5729216.34126 0.0000017.1852915.4493817.1391517.4659815.9544616.0165117.9082516.2232617.0897316.3884517.38407
Q9Y240 16.2570016.2715416.5778017.13331 0.0000016.7610916.9081016.8798015.2639316.7483116.5384216.9056716.7234616.6139616.7144216.2557216.5080217.28611 0.00000
Q9Y279;Q9Y279-2 17.1679417.4455617.3634517.2928917.2204716.9875417.17998 0.0000017.1630415.0289316.5458916.5845016.5875517.6596316.6165616.70633 0.00000 0.0000016.95138
Q9Y281;Q9Y281-3 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000012.30069 0.00000 0.00000 0.0000013.0103313.07666 0.00000 0.0000012.54090
Q9Y287 17.6628917.58819 0.0000017.6961417.7354117.0309416.9508318.26680 0.00000 0.00000 0.0000016.5786617.9707417.1072818.2760816.4771616.5435416.1737817.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-616.9277617.2207417.4259517.2986117.8281616.7418116.7206915.9049916.3961116.2202616.7063517.8930417.7065917.09718 0.0000016.9573018.3758717.5733317.93369
Q9Y2T3;Q9Y2T3-3 18.4676717.7211318.4401218.8656518.7322618.63877 0.0000017.1155719.3320316.5274618.1635417.6509217.5202617.4169318.6247718.2610718.5141918.0065017.90217
Q9Y490 0.0000013.96193 0.00000 0.0000013.6269013.3812113.74195 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.0000012.9323313.29442 0.0000013.8408613.52534
Q9Y4L1 18.5979518.47622 0.0000018.5596418.3046518.5237918.3602117.8747118.6725817.1867318.4140618.2671018.0855717.5979218.2899117.9772018.1487517.8805518.12515
Q9Y5F6;Q9Y5F6-2 16.4692915.78240 0.0000016.5289916.2848016.8634415.8995316.1095217.4013215.4592616.4288416.1595015.6315614.7356915.9682116.8848115.8777215.5542416.57524
Q9Y5I4;Q9Y5I4-2 17.1870117.4471717.4104317.5451417.2966717.3972817.1453216.4048217.3230815.8973117.13489 0.0000017.0858815.8738617.1035417.1090216.9384017.1550816.77629
Q9Y5Y7 18.8395719.1949519.0882018.7149718.6682419.0513319.0871118.8024518.7143917.1345217.8372718.7856218.6136718.7374718.7263918.4601119.50151 0.00000 0.00000
Q9Y617 16.8589716.7992716.2878217.07490 0.0000016.4995216.7026415.9239816.6377514.5826314.9398815.8697015.7117715.4650715.8083115.0352916.2830615.9196015.71292
Q9Y646 19.3219619.1902919.7019419.7601319.6236919.2862819.6221919.0164419.1501219.8615620.2448419.2073120.3028119.4350719.8938820.0145720.3061420.2034220.04164
Q9Y653;Q9Y653-2;Q9Y653-3 16.0124315.5279415.2287115.4948414.7566815.0856315.78890 0.0000015.63408 0.0000014.9520115.5336115.0601415.66794 0.0000015.4222815.8079615.1574015.23671
Q9Y696 15.1777815.5755314.7277014.59034 0.0000014.8599516.1363214.9492815.26568 0.0000015.8447915.8078416.1657415.9144815.6838716.1063316.0978016.7117615.65174
Q9Y6C2 0.00000 0.0000013.7565814.68157 0.00000 0.00000 0.0000014.31805 0.00000 0.00000 0.00000 0.00000 0.0000014.2040714.23561 0.0000014.40272 0.0000015.21141
Q9Y6N6 15.0502114.8329015.1182815.1401815.2558814.69813 0.00000 0.0000015.2927114.7428215.1248215.2456815.2850215.0246115.4150415.3453615.7153214.6398314.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-4 16.8422116.5973917.4395217.3560817.0749517.0880316.71735 0.0000017.45319 0.0000017.0342516.7477916.39020 0.0000017.5509217.08447 0.0000016.5327616.74913
Q9Y6R7 0.0000020.2993319.5978519.4288019.5819819.1303318.6903018.9960519.9931819.2330418.8163618.9021218.81282 0.0000017.9218518.7081418.7247819.4111319.27470
Q9Y6X5 0.0000015.5555815.73522 0.0000015.32828 0.00000 0.00000 0.00000 0.0000015.8946915.5618916.0887715.6691115.0119316.34016 0.0000016.1382715.8069015.73176
Q9Y6Y8;Q9Y6Y8-2 19.5628219.3857520.4473220.21596 0.0000019.6331920.0568618.6804420.02323 0.0000019.8094820.2043619.4182317.8474319.9276619.4326019.5991219.5450019.57720
Q9Y6Y9 0.0000013.9699712.63641 0.0000013.14453 0.0000012.8174112.89658 0.00000 0.00000 0.0000012.7073812.9781212.2875012.92941 0.0000013.63705 0.0000011.04203
S4R3U6 12.8052112.4424512.5046612.44461 0.00000 0.00000 0.00000 0.0000013.00793 0.0000010.13265 0.0000010.4976910.56335 0.00000 0.0000011.17371 0.0000011.79130

Transform predictions to long format

pred <- reshape2::melt(pred, id.vars = feat_name)
names(pred) <- c(feat_name, 'Sample ID', method)
pred <- pred[reshape2::melt(is.na(df))['value'] == TRUE, ]
pred
A data.frame: 71601 × 3
protein groupsSample IDZERO
<chr><fct><dbl>
11A0A075B6J9 Sample_0000
14A0A075B6P5;P01615 Sample_0000
15A0A075B6Q5 Sample_0000
16A0A075B6R2 Sample_0000
19A0A075B6S5 Sample_0000
24A0A087WSY4 Sample_0000
34A0A087WU43;A0A087WX17;A0A087WXI5;P12830;P12830-2 Sample_0000
39A0A087WW87;A0A087X0Q4;P01614 Sample_0000
40A0A087WWA5 Sample_0000
42A0A087WWT2;Q9NPD7 Sample_0000
43A0A087WX80;P24043 Sample_0000
45A0A087WXE9;E9PQ70;Q6UXH9;Q6UXH9-2;Q6UXH9-3 Sample_0000
48A0A087WYK9;Q02985;Q02985-2;Q6NSD3 Sample_0000
52A0A087WZR4 Sample_0000
53A0A087X089;Q16627;Q16627-2 Sample_0000
56A0A087X0M8 Sample_0000
59A0A087X117;A0A0G2JN29;J3KN36;P69849;Q15155;Q5JPE7;Q5JPE7-2 Sample_0000
69A0A0A0MQS9;A0A0A0MTC7;Q16363;Q16363-2 Sample_0000
78A0A0A0MS20;A0A0A0MSZ8;A0A0G2JM38;A0A0G2JM43;A0A0G2JM57;A0A0G2JM84;A0A0G2JMH7;A0A0G2JML1;A0A0G2JNE9;A0A0G2JNL1;A0A0G2JP25;A0A0G2JP84;A0A0G2JPA9;A0A0G2JPC7;A0A0G2JPU4;A0A0G2JPX5;A0A0G2JQ10;A0A0G2JQ20;A8MUE1;C9JST2;Q8NHJ6;Q8NHJ6-2;Q8NHJ6-3Sample_0000
85A0A0A0MT32;P38571;P38571-2 Sample_0000
87A0A0A0MT66 Sample_0000
90A0A0A0MTC8;Q9NQ36;Q9NQ36-2;Q9NQ36-3 Sample_0000
96A0A0A6YY99;O43508-2 Sample_0000
101A0A0B4J1V2 Sample_0000
102A0A0B4J1V6 Sample_0000
105A0A0B4J1Z1;C9JAB2;Q16629;Q16629-2;Q16629-3;Q16629-4 Sample_0000
108A0A0B4J2B5;S4R460 Sample_0000
109A0A0B4J2C3;P13693;Q5W0H4 Sample_0000
111A0A0C4DFP6;Q9NQ79;Q9NQ79-2 Sample_0000
113A0A0C4DG76;Q5JZ08;Q9BQS7;Q9BQS7-2;Q9BQS7-3;Q9BQS7-4 Sample_0000
298242Q96PQ0 Sample_2090
298244Q96RW7;Q96RW7-2 Sample_2090
298246Q96SM3 Sample_2090
298249Q99538 Sample_2090
298252Q99650;Q99650-2 Sample_2090
298261Q9BQT9;Q9BQT9-2 Sample_2090
298266Q9BT88 Sample_2090
298273Q9BX67 Sample_2090
298277Q9BXP8 Sample_2090
298287Q9H3T2;Q9H3T2-3 Sample_2090
298288Q9H3T3;Q9H3T3-3 Sample_2090
298289Q9H492;Q9H492-2 Sample_2090
298297Q9H8L6 Sample_2090
298299Q9HBR0 Sample_2090
298302Q9HC38-2 Sample_2090
298303Q9HC57 Sample_2090
298310Q9NPH3;Q9NPH3-2;Q9NPH3-5Sample_2090
298314Q9NRB3 Sample_2090
298316Q9NS00;Q9NS00-2 Sample_2090
298317Q9NS84 Sample_2090
298328Q9NY97;Q9NY97-2 Sample_2090
298341Q9P1W8;Q9P1W8-2;Q9P1W8-4Sample_2090
298349Q9UGM5;Q9UGM5-2 Sample_2090
298355Q9UI40;Q9UI40-2 Sample_2090
298357Q9UIW2 Sample_2090
298358Q9UJ14 Sample_2090
298377Q9UMX0;Q9UMX0-2;Q9UMX0-4Sample_2090
298384Q9UP79 Sample_2090
298388Q9Y240 Sample_2090
298398Q9Y5Y7 Sample_2090

Check dimension of long format dataframe

Hide code cell source

dim(pred)
  1. 71601
  2. 3

Save predictions to disk

Hide code cell source

fname = file.path(folder_experiment,
                  'preds',
                  paste0('pred_all_', toupper(method), '.csv'))
write_csv(pred, path = fname)
fname
Warning message:
“The `path` argument of `write_csv()` is deprecated as of readr 1.4.0.
 Please use the `file` argument instead.”
'runs/alzheimer_study/preds/pred_all_ZERO.csv'