NAGuide R methods

NAGuide R methods#

Setup basic methods and packages used for all methods

  • BiocManager could be moved to methods who are installed from BioConductor

Hide code cell source

# options("install.lock"=FALSE)

packages_base_R <-
  c("BiocManager", "reshape2", "data.table", "readr", "tibble")

install_rpackage  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
  
}

# used in the large imputation function for two packages
install_bioconductor  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    BiocManager::install(pkg)
    library(pkg, character.only = TRUE)
  }
  
}


for (package in packages_base_R) {
  # Check if the package is already installed
  install_rpackage(pkg = package)
}
Loading required package: BiocManager
Loading required package: reshape2
Loading required package: data.table

Attaching package: ‘data.table’
The following objects are masked from ‘package:reshape2’:

    dcast, melt
Loading required package: readr
Loading required package: tibble

setup can be tricky… trying to integrate as much as possible into conda environment

Copied from NAGuideR’s github RShiny application. Adapted to run as standalone function in context of the Snakemake workflow.

  • df and df1 ?

  • seems quite hacky

  • code is only slightly adapted from repo to run here, mainly to install packages on the fly

Hide code cell source

nafunctions <- function(x, method = "zero") {
  df <- df1 <- as.data.frame(x)
  method <- tolower(method)
  if (method == "zero") {
    df[is.na(df)] <- 0
  }
  else if (method == "minimum") {
    df[is.na(df)] <- min(df1, na.rm = TRUE)
  }
  else if (method == "colmedian") {
    install_rpackage('e1071')
    df <- impute(df1, what = "median")
  }
  else if (method == "rowmedian") {
    install_rpackage('e1071')
    dfx <- impute(t(df1), what = "median")
    df <- t(dfx)
  }
  else if (method == "knn_impute") {
    install_bioconductor('impute')
    data_zero1 <-
      impute.knn(as.matrix(df1),
                 k = 10,
                 rowmax = 1,
                 colmax = 1)#rowmax = 0.9, colmax = 0.9
    df <- data_zero1$data
  }
  else if (method == "seqknn") {
    if (!require(SeqKnn)) {
      install.packages("src/R_NAGuideR/SeqKnn_1.0.1.tar.gz",
                       repos = NULL,
                       type = "source")
      library(SeqKnn)
    }
    df <- SeqKNN(df1, k = 10)
  }
  else if (method == "bpca") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(
        as.matrix(df1),
        nPcs = ncol(df1) - 1,
        method = "bpca",
        maxSteps = 100
      )
    df <- completeObs(data_zero1)
  }
  else if (method == "svdmethod") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(as.matrix(df1),
                      nPcs = ncol(df1) - 1,
                      method = "svdImpute")
    df <- completeObs(data_zero1)
  }
  else if (method == "lls") {
    install_bioconductor('pcaMethods')
    data_zero1 <- llsImpute(t(df1), k = 10)
    df <- t(completeObs(data_zero1))
  }
  else if (method == "mle") {
    install_rpackage('norm')
    xxm <- as.matrix(df1)
    ss <- norm::prelim.norm(xxm)
    thx <- norm::em.norm(ss)
    norm::rngseed(123)
    df <- norm::imp.norm(ss, thx, xxm)
  }
  else if (method == "qrilc") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- t(df1)
    data_zero1 <-
      imputeLCMD::impute.QRILC(xxm, tune.sigma = 1)[[1]]
    df <- t(data_zero1)
  }
  else if (method == "mindet") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <- imputeLCMD::impute.MinDet(xxm, q = 0.01)
  }
  else if (method == "minprob") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <-
      imputeLCMD::impute.MinProb(xxm, q = 0.01, tune.sigma = 1)
  }
  else if (method == "irm") {
    install_rpackage('VIM')
    df <- irmi(df1, trace = TRUE, imp_var = FALSE)
    rownames(df) <- rownames(df1)
  }
  else if (method == "impseq") {
    install_rpackage('rrcovNA')
    df <- impSeq(df1)
  }
  else if (method == "impseqrob") {
    install_rpackage('rrcovNA')
    data_zero1 <- impSeqRob(df1, alpha = 0.9)
    df <- data_zero1$x
  }
  else if (method == "mice-norm") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "norm")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "mice-cart") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "cart")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "trknn") {
    source('src/R_NAGuideR/Imput_funcs.r')
    # sim_trKNN_wrapper <- function(data) {
    #   result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t
    #   return(result)
    # }
    # df1x <- sim_trKNN_wrapper(t(df1))
    # df<-as.data.frame(t(df1x))
    df <-
      imputeKNN(as.matrix(df),
                k = 10,
                distance = 'truncation',
                perc = 0)
    df <- as.data.frame(df)
  }
  else if (method == "rf") {
    install_rpackage("missForest")
    data_zero1 <- missForest(
      t(df1),
      maxiter = 10,
      ntree = 20 # input$rfntrees
      ,
      mtry = floor(nrow(df1) ^ (1 / 3)),
      verbose = TRUE
    )
    df <- t(data_zero1$ximp)
  }
  else if (method == "pi") {
    width <- 0.3 # input$piwidth
    downshift <- 1.8 # input$pidownshift
    for (i in 1:ncol(df1)) {
      temp <- df1[[i]]
      if (sum(is.na(temp)) > 0) {
        temp.sd <- width * sd(temp[!is.na(temp)], na.rm = TRUE)
        temp.mean <-
          mean(temp[!is.na(temp)], na.rm = TRUE) - downshift * sd(temp[!is.na(temp)], na.rm = TRUE)
        n.missing <- sum(is.na(temp))
        temp[is.na(temp)] <-
          rnorm(n.missing, mean = temp.mean, sd = temp.sd)
        df[[i]] <- temp
      }
    }
    df
  }
  # else if(method=="grr"){
  #   library(DreamAI)
  #   df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
  # }
  else if (method == "gms") {
    # install.packages('GMSimpute')
    if (!require(GMSimpute)) {
      install.packages(
        "src/R_NAGuideR/GMSimpute_0.0.1.1.tar.gz",
        repos = NULL,
        type = "source"
      )
      
      library(GMSimpute)
    }
    
    df <- GMS.Lasso(df1,
                    nfolds = 3,
                    log.scale = FALSE,
                    TS.Lasso = TRUE)
  }
  else if (method == "msimpute") {
    install_bioconductor("msImpute")
    df <- msImpute(as.matrix(df),
                   method = 'v2')
    df <- as.data.frame(df)
  }
  else if (method == "msimpute_mnar") {
    install_bioconductor("msImpute")
    df <-
      msImpute(as.matrix(df),
               method = 'v2-mnar',
               group = rep(1, dim(df)[2]))
    df <- as.data.frame(df)
  }
  else if (method == "gsimp") {
    options(stringsAsFactors = F)
    # dependencies parly for sourced file
    
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    install_rpackage("magrittr")
    install_rpackage("glmnet")
    install_rpackage("abind")
    install_rpackage("foreach")
    install_rpackage("doParallel")
    source('src/R_NAGuideR/GSimp.R')
    
    # wrapper function with data pre-processing
    pre_processing_GS_wrapper <- function(data_raw_log) {
      # samples in rows, features in columns #
      # Initialization #
      data_raw_log_qrilc <- as.data.frame(data_raw_log) %>%
        impute.QRILC() %>% extract2(1)
      # Centralization and scaling #
      data_raw_log_qrilc_sc <-
        scale_recover(data_raw_log_qrilc, method = 'scale')
      # Data after centralization and scaling #
      data_raw_log_qrilc_sc_df <- data_raw_log_qrilc_sc[[1]]
      # Parameters for centralization and scaling (for scaling recovery) #
      data_raw_log_qrilc_sc_df_param <- data_raw_log_qrilc_sc[[2]]
      # NA position #
      NA_pos <- which(is.na(data_raw_log), arr.ind = T)
      # NA introduced to log-scaled-initialized data #
      data_raw_log_sc <- data_raw_log_qrilc_sc_df
      data_raw_log_sc[NA_pos] <- NA
      # Feed initialized and missing data into GSimp imputation #
      result <-
        data_raw_log_sc %>% GS_impute(
          .,
          iters_each = 50,
          iters_all = 10,
          initial = data_raw_log_qrilc_sc_df,
          lo = -Inf,
          hi = 'min',
          n_cores = 1,
          imp_model = 'glmnet_pred'
        )
      data_imp_log_sc <- result$data_imp
      # Data recovery #
      data_imp <- data_imp_log_sc %>%
        scale_recover(., method = 'recover',
                      param_df = data_raw_log_qrilc_sc_df_param) %>%
        extract2(1)
      return(data_imp)
    }
    df <- t(df) # samples in rows, feature in columns
    df <- pre_processing_GS_wrapper(df)
    df <- t(df) # features in rows, samples in columns
    
  }
  else{
    stop(paste("Unspported methods so far: ", method))
  }
  df <- as.data.frame(df)
  df
}

Parameters#

Choose one of the available methods. Some methods might fail for your dataset for unknown reasons (and the error won’t always be easy to understand)

method = 'ZERO'
method = 'MINIMUM'
method = 'COLMEDIAN'
method = 'ROWMEDIAN'
method = 'KNN_IMPUTE'
method = 'SEQKNN'
method = 'BPCA'
method = 'SVDMETHOD'
method = 'LLS'
method = 'MLE'
mehtod = 'LLS'
method = 'QRILC'
method = 'MINDET'
method = 'MINPROB'
method = 'IRM'
method = 'IMPSEQ'
method = 'IMPSEQROB'
method = 'MICE-NORM'
method = 'MICE-CART'
method = 'RF'
method = 'PI'
method = 'GMS'
method = 'TRKNN',
method = 'MSIMPUTE'
method = 'MSIMPUTE_MNAR'
method = 'GSIMP'
train_split = 'runs/example/data/data_wide_sample_cols.csv' # test
folder_experiment = 'runs/example/'
method = 'KNN_IMPUTE'
# Parameters
train_split = "runs/alzheimer_study/data/data_wide_sample_cols.csv"
method = "ROWMEDIAN"
folder_experiment = "runs/alzheimer_study"

Dump predictions#

df <-
  utils::read.csv(
    train_split,
    row.names = 1,
    header = TRUE,
    stringsAsFactors = FALSE
  )
df
A data.frame: 1421 × 210
Sample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_009Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P3523715.91237 NA16.1114916.1069715.6032115.8116115.4996615.2207515.98013 NA NA15.8626415.6559415.40126 NA15.6822515.7980415.7394915.47682 NA
A0A024R0T9;K7ER74;P0265516.8519416.87369 NA17.0315115.3305118.6140717.4091717.6839116.3862516.5897217.3095516.6148017.9533918.1993817.2791116.8863517.55400 NA16.7794817.26137
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q815.5704715.5189215.9353215.8018715.3752215.6242015.9118515.3849915.8944715.37538 NA15.9317914.8587115.12451 NA14.9100615.5996615.4689614.9949615.17487
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O7550316.4810816.3869416.4157716.9786216.6790615.9576416.2338916.4179316.2713416.2097015.9540216.0295916.4975316.5128416.5126916.4823215.9378216.8977316.1324616.23505
A0A075B6H717.30138 NA18.1748015.96322 NA18.31720 NA17.2139317.79403 NA NA16.4826214.8346713.5689314.48257 NA NA NA NA NA
A0A075B6H920.2464619.9411019.2509019.6279720.4498318.6856920.3472220.7960418.9952319.24667 NA NA18.2698619.8006220.1831517.7048518.1545018.6363114.9075417.89344
A0A075B6I016.7644618.7862416.8321717.8520918.6818517.8654318.2499520.3093617.9784319.4104718.0055018.5414217.8074819.8986519.6742017.0394518.1517917.95040 NA17.74443
A0A075B6I117.5835717.1438815.6707318.8774917.0813114.8236918.5196919.3564018.5644917.9631016.4140217.5051014.0191117.5057420.25111 NA16.5026716.32073 NA16.37054
A0A075B6I616.98778 NA17.0118314.1815314.1402117.0417917.3756016.9979417.2031116.7583915.7254317.0675816.4584916.3277816.3337216.4130416.8600316.4007116.1186815.77958
A0A075B6I920.0541619.0673318.5690618.9850619.6856419.3758019.3988520.5435918.8439718.8997521.8526320.1465019.5345919.8671320.7662619.1021118.5379618.8489718.3677618.80581
A0A075B6J9 NA16.18799 NA13.4381314.4948415.9087516.18436 NA16.1701916.32362 NA NA16.6319317.4357419.7781116.06423 NA17.58044 NA16.53239
A0A075B6K416.1485016.1272715.3867216.5650216.4177116.45498 NA17.0467716.0321016.1490818.5816218.5173716.4835416.0633817.1912315.3503316.58173 NA NA16.33787
A0A075B6K517.3426117.4172217.2363316.2671717.3902018.3360417.2176518.3248817.5245818.7760819.5156618.9890617.8061416.7481918.4125417.1538417.9018417.5189917.5022016.98874
A0A075B6P5;P01615 NA NA NA16.9896517.4933018.2692317.7898918.1533417.7870318.3521319.92434 NA NA19.8413619.2662017.1752819.5750619.3058818.9765118.68810
A0A075B6Q5 NA NA15.12836 NA NA NA NA16.73949 NA NA NA17.0259816.1028715.87972 NA NA NA16.88355 NA15.68019
A0A075B6R2 NA NA16.2798116.7774417.4974417.0539616.9177019.7722216.7370517.9567017.9982218.1213119.19184 NA17.2858916.5977717.0704416.06459 NA16.03648
A0A075B6R9;A0A0C4DH6819.5164019.37284 NA19.0092319.5414719.5671920.1850720.5645419.5232419.3871620.1274920.3736019.4998318.9773920.5883218.0733718.8467319.6205419.3392620.39256
A0A075B6S215.5112416.2082414.8457015.23242 NA16.2865314.65632 NA14.5121216.5692515.0742315.9130416.6596315.8011717.4537814.1015316.6121316.2198415.6958216.38320
A0A075B6S5 NA NA NA NA NA15.9601616.1527717.0784216.8465916.3338515.5131516.14929 NA15.3513315.9116115.6517016.03424 NA NA NA
A0A075B6S916.5576517.5159717.9399916.9514717.0879517.3609916.4210319.7908017.1339915.4731717.2595820.5021819.2650718.4246818.1972516.1456319.5037318.4250117.7519016.21995
A0A075B7B814.91594 NA14.65865 NA15.3488614.5011115.1219614.5114215.31218 NA NA NA NA NA NA NA NA NA NA NA
A0A075B7D018.35754 NA18.2301819.2610319.7863719.6555118.5159320.5000819.5057220.00935 NA19.23066 NA18.0771119.4352819.0449817.0216318.0826218.7376717.31493
A0A087WSV8;V9HW7515.6758714.50492 NA15.5530115.52087 NA15.21659 NA13.85270 NA NA15.5257615.2093415.5381413.9836015.1491715.89044 NA15.4187214.79854
A0A087WSY4 NA17.1499315.65052 NA NA NA NA NA NA NA15.85031 NA16.2078115.33813 NA15.12749 NA NA NA20.34338
A0A087WSY5;Q96IY417.8632318.0413017.7766818.3417818.4288817.8513117.6895319.1298418.0186818.2319617.8611717.1273117.2878418.3573418.0921317.1149617.1954117.9442618.0847817.34736
A0A087WSY620.1168620.2745019.2808019.7819019.7265820.7958019.6323920.5975919.9661018.7243921.2815519.5135919.2447419.7366319.37543 NA19.7985518.6509218.4342219.15982
A0A087WSZ0;A0A0G2JQJ016.68463 NA NA NA NA13.03775 NA15.2586915.5957813.63975 NA15.9783514.31120 NA17.3274714.8897215.6365813.7425916.4765213.66795
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-217.5937718.0124017.8292118.2685017.5570617.3805117.3108216.4085917.3750618.0290816.9424917.74655 NA17.69916 NA18.0800017.4805717.8153518.1862417.91903
A0A087WTA8;P0812317.6190818.2168517.5444517.5022517.8028816.9918417.9773917.7099417.5321317.3504517.5621916.5727916.90066 NA NA16.9340216.74856 NA17.1775717.26857
A0A087WTK0;A0A087WVC6;Q1291315.5012115.3069615.8147115.5515615.3213116.0441516.18863 NA15.5648615.37830 NA15.7602915.21911 NA NA NA15.3939615.24665 NA NA
Q9UN70;Q9UN70-216.1573415.7245516.4318216.2305215.95639 NA16.4384315.6314615.7212615.74455 NA15.6839115.4991315.9858515.0297915.4857115.5279816.0801215.6316515.77433
Q9UNN817.5605617.3015917.76464 NA17.1159118.1904217.9768217.0015917.09973 NA16.77195 NA17.7796717.5903017.3782116.4571316.4507517.8933217.6361817.39555
Q9UNW116.9599716.5129016.14734 NA16.5081815.6471416.4181416.4868116.3143817.10669 NA16.21768 NA16.06726 NA16.3173416.2100416.5703716.1786816.53072
Q9UP7915.15310 NA16.0362115.4160915.5027716.3383315.8980214.59940 NA15.94205 NA NA15.08015 NA15.2610415.73977 NA NA16.10664 NA
Q9UPU318.3254018.23025 NA18.1004018.1773818.4628817.9007617.7512118.7432117.7462416.7844818.4704719.0300518.6485618.1108918.4219618.3693717.6405917.2421018.27864
Q9UQ5215.3490115.5498915.71187 NA15.6280915.5390215.4003114.5504515.0872415.2123315.8143715.6499415.60246 NA15.0466615.7163716.1233015.6857115.8348516.18904
Q9UQM716.7798316.6355716.9280017.3646817.2613916.5729216.34126 NA17.18529 NA15.4493817.1391517.4659815.9544616.0165117.9082516.2232617.0897316.3884517.38407
Q9Y24016.2570016.2715416.5778017.13331 NA16.7610916.9081016.8798015.2639316.6680716.7483116.5384216.9056716.7234616.6139616.7144216.2557216.5080217.28611 NA
Q9Y279;Q9Y279-217.1679417.4455617.3634517.2928917.2204716.9875417.17998 NA17.1630416.5564115.0289316.5458916.5845016.5875517.6596316.6165616.70633 NA NA16.95138
Q9Y281;Q9Y281-3 NA NA NA NA NA NA NA NA NA NA NA12.30069 NA NA NA13.0103313.07666 NA NA12.54090
Q9Y28717.6628917.58819 NA17.6961417.7354117.0309416.9508318.26680 NA16.89611 NA NA16.5786617.9707417.1072818.2760816.4771616.5435416.1737817.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-616.9277617.2207417.4259517.2986117.8281616.7418116.7206915.9049916.3961117.3693516.2202616.7063517.8930417.7065917.09718 NA16.9573018.3758717.5733317.93369
Q9Y2T3;Q9Y2T3-318.4676717.7211318.4401218.8656518.7322618.63877 NA17.1155719.3320318.0933816.5274618.1635417.6509217.5202617.4169318.6247718.2610718.5141918.0065017.90217
Q9Y490 NA13.96193 NA NA13.6269013.3812113.74195 NA NA13.37095 NA NA NA NA NA12.9323313.29442 NA13.8408613.52534
Q9Y4L118.5979518.47622 NA18.5596418.3046518.5237918.3602117.8747118.6725818.4313117.1867318.4140618.2671018.0855717.5979218.2899117.9772018.1487517.8805518.12515
Q9Y5F6;Q9Y5F6-216.4692915.78240 NA16.5289916.2848016.8634415.8995316.1095217.4013216.3436715.4592616.4288416.1595015.6315614.7356915.9682116.8848115.8777215.5542416.57524
Q9Y5I4;Q9Y5I4-217.1870117.4471717.4104317.5451417.2966717.3972817.1453216.4048217.3230817.4575315.8973117.13489 NA17.0858815.8738617.1035417.1090216.9384017.1550816.77629
Q9Y5Y718.8395719.1949519.0882018.7149718.6682419.0513319.0871118.8024518.7143918.7874517.1345217.8372718.7856218.6136718.7374718.7263918.4601119.50151 NA NA
Q9Y61716.8589716.7992716.2878217.07490 NA16.4995216.7026415.9239816.6377516.4427714.5826314.9398815.8697015.7117715.4650715.8083115.0352916.2830615.9196015.71292
Q9Y64619.3219619.1902919.7019419.7601319.6236919.2862819.6221919.0164419.1501219.5165219.8615620.2448419.2073120.3028119.4350719.8938820.0145720.3061420.2034220.04164
Q9Y653;Q9Y653-2;Q9Y653-316.0124315.5279415.2287115.4948414.7566815.0856315.78890 NA15.63408 NA NA14.9520115.5336115.0601415.66794 NA15.4222815.8079615.1574015.23671
Q9Y69615.1777815.5755314.7277014.59034 NA14.8599516.1363214.9492815.2656814.63326 NA15.8447915.8078416.1657415.9144815.6838716.1063316.0978016.7117615.65174
Q9Y6C2 NA NA13.7565814.68157 NA NA NA14.31805 NA NA NA NA NA NA14.2040714.23561 NA14.40272 NA15.21141
Q9Y6N615.0502114.8329015.1182815.1401815.2558814.69813 NA NA15.29271 NA14.7428215.1248215.2456815.2850215.0246115.4150415.3453615.7153214.6398314.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-416.8422116.5973917.4395217.3560817.0749517.0880316.71735 NA17.4531916.91617 NA17.0342516.7477916.39020 NA17.5509217.08447 NA16.5327616.74913
Q9Y6R7 NA20.2993319.5978519.4288019.5819819.1303318.6903018.9960519.9931818.5503419.2330418.8163618.9021218.81282 NA17.9218518.7081418.7247819.4111319.27470
Q9Y6X5 NA15.5555815.73522 NA15.32828 NA NA NA NA NA15.8946915.5618916.0887715.6691115.0119316.34016 NA16.1382715.8069015.73176
Q9Y6Y8;Q9Y6Y8-219.5628219.3857520.4473220.21596 NA19.6331920.0568618.6804420.0232319.94839 NA19.8094820.2043619.4182317.8474319.9276619.4326019.5991219.5450019.57720
Q9Y6Y9 NA13.9699712.63641 NA13.14453 NA12.8174112.89658 NA13.68470 NA NA12.7073812.9781212.2875012.92941 NA13.63705 NA11.04203
S4R3U612.8052112.4424512.5046612.44461 NA NA NA NA13.00793 NA NA10.13265 NA10.4976910.56335 NA NA11.17371 NA11.79130
  • data.frame does not allow abritary column names, but only valid column names…

  • tibbles don’t support rownames, and the imputation methods rely on normal data.frames. Save the header row for later use.

original_header <- colnames(readr::read_csv(
  train_split,
  n_max = 1,
  col_names = TRUE,
  skip = 0
))
feat_name <- original_header[1]
original_header[1:5]
Rows: 1 Columns: 211
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (1): protein groups
dbl (180): Sample_000, Sample_002, Sample_003, Sample_004, Sample_005, Sampl...
lgl  (30): Sample_001, Sample_009, Sample_012, Sample_015, Sample_017, Sampl...

 Use `spec()` to retrieve the full column specification for this data.
 Specify the column types or set `show_col_types = FALSE` to quiet this message.
  1. 'protein groups'
  2. 'Sample_000'
  3. 'Sample_001'
  4. 'Sample_002'
  5. 'Sample_003'

Uncomment to test certain methods (only for debugging, as at least one method per package is tested using Github Actions)

Hide code cell source

# to_test <- c(
# 'ZERO',
# 'MINIMUM',
# 'COLMEDIAN',
# 'ROWMEDIAN',
# 'KNN_IMPUTE',
# 'SEQKNN',
# 'BPCA',
# 'SVDMETHOD',
# 'LLS',
# 'MLE',
# 'LLS',
# 'QRILC',
# 'MINDET',
# 'MINPROB',
# 'IRM',
# 'IMPSEQ',
# 'IMPSEQROB',
# 'MICE-NORM',
# 'MICE-CART',
# 'RF',
# 'PI',
# 'GMS', # fails to install on Windows
# 'TRKNN',
# 'MSIMPUTE'
# 'MSIMPUTE_MNAR'
# 'GSIMP'
# )

# for (method in to_test) {
#     print(method)
#     pred <- nafunctions(df, method)
# }

Impute and save predictions with original feature and column names

pred <- nafunctions(df, method)
pred <- tibble::as_tibble(cbind(rownames(pred), pred))
names(pred) <- original_header
pred
Loading required package: e1071
A tibble: 1421 × 211
protein groupsSample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P35237 15.9123715.6254616.1114916.1069715.6032115.8116115.4996615.2207515.9801315.6254615.8626415.6559415.4012615.6254615.6822515.7980415.7394915.4768215.62546
A0A024R0T9;K7ER74;P02655 16.8519416.8736917.5520617.0315115.3305118.6140717.4091717.6839116.3862517.3095516.6148017.9533918.1993817.2791116.8863517.5540017.5520616.7794817.26137
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q815.5704715.5189215.9353215.8018715.3752215.6242015.9118515.3849915.8944715.4457915.9317914.8587115.1245115.4457914.9100615.5996615.4689614.9949615.17487
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O75503 16.4810816.3869416.4157716.9786216.6790615.9576416.2338916.4179316.2713415.9540216.0295916.4975316.5128416.5126916.4823215.9378216.8977316.1324616.23505
A0A075B6H7 17.3013816.7224618.1748015.9632216.7224618.3172016.7224617.2139317.7940316.7224616.4826214.8346713.5689314.4825716.7224616.7224616.7224616.7224616.72246
A0A075B6H9 20.2464619.9411019.2509019.6279720.4498318.6856920.3472220.7960418.9952319.3413119.3413118.2698619.8006220.1831517.7048518.1545018.6363114.9075417.89344
A0A075B6I0 16.7644618.7862416.8321717.8520918.6818517.8654318.2499520.3093617.9784318.0055018.5414217.8074819.8986519.6742017.0394518.1517917.9504018.5063517.74443
A0A075B6I1 17.5835717.1438815.6707318.8774917.0813114.8236918.5196919.3564018.5644916.4140217.5051014.0191117.5057420.2511117.5014016.5026716.3207317.5014016.37054
A0A075B6I6 16.9877816.2424817.0118314.1815314.1402117.0417917.3756016.9979417.2031115.7254317.0675816.4584916.3277816.3337216.4130416.8600316.4007116.1186815.77958
A0A075B6I9 20.0541619.0673318.5690618.9850619.6856419.3758019.3988520.5435918.8439721.8526320.1465019.5345919.8671320.7662619.1021118.5379618.8489718.3677618.80581
A0A075B6J9 16.6908016.1879916.6908013.4381314.4948415.9087516.1843616.6908016.1701916.6908016.6908016.6319317.4357419.7781116.0642316.6908017.5804416.6908016.53239
A0A075B6K4 16.1485016.1272715.3867216.5650216.4177116.4549816.7083817.0467716.0321018.5816218.5173716.4835416.0633817.1912315.3503316.5817316.7083816.7083816.33787
A0A075B6K5 17.3426117.4172217.2363316.2671717.3902018.3360417.2176518.3248817.5245819.5156618.9890617.8061416.7481918.4125417.1538417.9018417.5189917.5022016.98874
A0A075B6P5;P01615 19.1300019.1300019.1300016.9896517.4933018.2692317.7898918.1533417.7870319.9243419.1300019.1300019.8413619.2662017.1752819.5750619.3058818.9765118.68810
A0A075B6Q5 16.5030716.5030715.1283616.5030716.5030716.5030716.5030716.7394916.5030716.5030717.0259816.1028715.8797216.5030716.5030716.5030716.8835516.5030715.68019
A0A075B6R2 17.0899217.0899216.2798116.7774417.4974417.0539616.9177019.7722216.7370517.9982218.1213119.1918417.0899217.2858916.5977717.0704416.0645917.0899216.03648
A0A075B6R9;A0A0C4DH68 19.5164019.3728419.6582919.0092319.5414719.5671920.1850720.5645419.5232420.1274920.3736019.4998318.9773920.5883218.0733718.8467319.6205419.3392620.39256
A0A075B6S2 15.5112416.2082414.8457015.2324216.2070416.2865314.6563216.2070414.5121215.0742315.9130416.6596315.8011717.4537814.1015316.6121316.2198415.6958216.38320
A0A075B6S5 16.2031716.2031716.2031716.2031716.2031715.9601616.1527717.0784216.8465915.5131516.1492916.2031715.3513315.9116115.6517016.0342416.2031716.2031716.20317
A0A075B6S9 16.5576517.5159717.9399916.9514717.0879517.3609916.4210319.7908017.1339917.2595820.5021819.2650718.4246818.1972516.1456319.5037318.4250117.7519016.21995
A0A075B7B8 14.9159415.3073814.6586515.3073815.3488614.5011115.1219614.5114215.3121815.3073815.3073815.3073815.3073815.3073815.3073815.3073815.3073815.3073815.30738
A0A075B7D0 18.3575419.1774418.2301819.2610319.7863719.6555118.5159320.5000819.5057219.1774419.2306619.1774418.0771119.4352819.0449817.0216318.0826218.7376717.31493
A0A087WSV8;V9HW75 15.6758714.5049215.0636115.5530115.5208715.0636115.2165915.0636113.8527015.0636115.5257615.2093415.5381413.9836015.1491715.8904415.0636115.4187214.79854
A0A087WSY4 15.7321917.1499315.6505215.7321915.7321915.7321915.7321915.7321915.7321915.8503115.7321916.2078115.3381315.7321915.1274915.7321915.7321915.7321920.34338
A0A087WSY5;Q96IY4 17.8632318.0413017.7766818.3417818.4288817.8513117.6895319.1298418.0186817.8611717.1273117.2878418.3573418.0921317.1149617.1954117.9442618.0847817.34736
A0A087WSY6 20.1168620.2745019.2808019.7819019.7265820.7958019.6323920.5975919.9661021.2815519.5135919.2447419.7366319.3754319.5304719.7985518.6509218.4342219.15982
A0A087WSZ0;A0A0G2JQJ0 16.6846315.4764115.4764115.4764115.4764113.0377515.4764115.2586915.5957815.4764115.9783514.3112015.4764117.3274714.8897215.6365813.7425916.4765213.66795
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-2 17.5937718.0124017.8292118.2685017.5570617.3805117.3108216.4085917.3750616.9424917.7465517.4929117.6991617.4929118.0800017.4805717.8153518.1862417.91903
A0A087WTA8;P08123 17.6190818.2168517.5444517.5022517.8028816.9918417.9773917.7099417.5321317.5621916.5727916.9006617.3504517.3504516.9340216.7485617.3504517.1775717.26857
A0A087WTK0;A0A087WVC6;Q12913 15.5012115.3069615.8147115.5515615.3213116.0441516.1886315.3213115.5648615.3213115.7602915.2191115.3213115.3213115.3213115.3939615.2466515.3213115.32131
Q9UN70;Q9UN70-2 16.1573415.7245516.4318216.2305215.9563915.7522416.4384315.6314615.7212615.7522415.6839115.4991315.9858515.0297915.4857115.5279816.0801215.6316515.77433
Q9UNN8 17.5605617.3015917.7646417.0385117.1159118.1904217.9768217.0015917.0997316.7719517.0385117.7796717.5903017.3782116.4571316.4507517.8933217.6361817.39555
Q9UNW1 16.9599716.5129016.1473416.1558016.5081815.6471416.4181416.4868116.3143816.1558016.2176816.1558016.0672616.1558016.3173416.2100416.5703716.1786816.53072
Q9UP79 15.1531015.5245516.0362115.4160915.5027716.3383315.8980214.5994015.5245515.5245515.5245515.0801515.5245515.2610415.7397715.5245515.5245516.1066415.52455
Q9UPU3 18.3254018.2302518.4128418.1004018.1773818.4628817.9007617.7512118.7432116.7844818.4704719.0300518.6485618.1108918.4219618.3693717.6405917.2421018.27864
Q9UQ52 15.3490115.5498915.7118715.6311815.6280915.5390215.4003114.5504515.0872415.8143715.6499415.6024615.6311815.0466615.7163716.1233015.6857115.8348516.18904
Q9UQM7 16.7798316.6355716.9280017.3646817.2613916.5729216.3412616.6902817.1852915.4493817.1391517.4659815.9544616.0165117.9082516.2232617.0897316.3884517.38407
Q9Y240 16.2570016.2715416.5778017.1333116.4537316.7610916.9081016.8798015.2639316.7483116.5384216.9056716.7234616.6139616.7144216.2557216.5080217.2861116.45373
Q9Y279;Q9Y279-2 17.1679417.4455617.3634517.2928917.2204716.9875417.1799816.6165617.1630415.0289316.5458916.5845016.5875517.6596316.6165616.7063316.6165616.6165616.95138
Q9Y281;Q9Y281-3 12.8043012.8043012.8043012.8043012.8043012.8043012.8043012.8043012.8043012.8043012.3006912.8043012.8043012.8043013.0103313.0766612.8043012.8043012.54090
Q9Y287 17.6628917.5881917.2205917.6961417.7354117.0309416.9508318.2668017.2205917.2205917.2205916.5786617.9707417.1072818.2760816.4771616.5435416.1737817.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-616.9277617.2207417.4259517.2986117.8281616.7418116.7206915.9049916.3961116.2202616.7063517.8930417.7065917.0971816.9067716.9573018.3758717.5733317.93369
Q9Y2T3;Q9Y2T3-3 18.4676717.7211318.4401218.8656518.7322618.6387717.8758717.1155719.3320316.5274618.1635417.6509217.5202617.4169318.6247718.2610718.5141918.0065017.90217
Q9Y490 13.1570313.9619313.1570313.1570313.6269013.3812113.7419513.1570313.1570313.1570313.1570313.1570313.1570313.1570312.9323313.2944213.1570313.8408613.52534
Q9Y4L1 18.5979518.4762218.0449318.5596418.3046518.5237918.3602117.8747118.6725817.1867318.4140618.2671018.0855717.5979218.2899117.9772018.1487517.8805518.12515
Q9Y5F6;Q9Y5F6-2 16.4692915.7824016.2187116.5289916.2848016.8634415.8995316.1095217.4013215.4592616.4288416.1595015.6315614.7356915.9682116.8848115.8777215.5542416.57524
Q9Y5I4;Q9Y5I4-2 17.1870117.4471717.4104317.5451417.2966717.3972817.1453216.4048217.3230815.8973117.1348916.8159617.0858815.8738617.1035417.1090216.9384017.1550816.77629
Q9Y5Y7 18.8395719.1949519.0882018.7149718.6682419.0513319.0871118.8024518.7143917.1345217.8372718.7856218.6136718.7374718.7263918.4601119.5015118.3787018.37870
Q9Y617 16.8589716.7992716.2878217.0749015.6813116.4995216.7026415.9239816.6377514.5826314.9398815.8697015.7117715.4650715.8083115.0352916.2830615.9196015.71292
Q9Y646 19.3219619.1902919.7019419.7601319.6236919.2862819.6221919.0164419.1501219.8615620.2448419.2073120.3028119.4350719.8938820.0145720.3061420.2034220.04164
Q9Y653;Q9Y653-2;Q9Y653-3 16.0124315.5279415.2287115.4948414.7566815.0856315.7889015.2531315.6340815.2531314.9520115.5336115.0601415.6679415.2531315.4222815.8079615.1574015.23671
Q9Y696 15.1777815.5755314.7277014.5903415.5513614.8599516.1363214.9492815.2656815.5513615.8447915.8078416.1657415.9144815.6838716.1063316.0978016.7117615.65174
Q9Y6C2 14.2561214.2561213.7565814.6815714.2561214.2561214.2561214.3180514.2561214.2561214.2561214.2561214.2561214.2040714.2356114.2561214.4027214.2561215.21141
Q9Y6N6 15.0502114.8329015.1182815.1401815.2558814.6981315.0241515.0241515.2927114.7428215.1248215.2456815.2850215.0246115.4150415.3453615.7153214.6398314.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-4 16.8422116.5973917.4395217.3560817.0749517.0880316.7173516.8401317.4531916.8401317.0342516.7477916.3902016.8401317.5509217.0844716.8401316.5327616.74913
Q9Y6R7 19.2538020.2993319.5978519.4288019.5819819.1303318.6903018.9960519.9931819.2330418.8163618.9021218.8128219.2538017.9218518.7081418.7247819.4111319.27470
Q9Y6X5 15.3282815.5555815.7352215.3282815.3282815.3282815.3282815.3282815.3282815.8946915.5618916.0887715.6691115.0119316.3401615.3282816.1382715.8069015.73176
Q9Y6Y8;Q9Y6Y8-2 19.5628219.3857520.4473220.2159619.4904419.6331920.0568618.6804420.0232319.4904419.8094820.2043619.4182317.8474319.9276619.4326019.5991219.5450019.57720
Q9Y6Y9 12.4082713.9699712.6364112.4082713.1445312.4082712.8174112.8965812.4082712.4082712.4082712.7073812.9781212.2875012.9294112.4082713.6370512.4082711.04203
S4R3U6 12.8052112.4424512.5046612.4446111.4044211.4044211.4044211.4044213.0079311.4044210.1326511.4044210.4976910.5633511.4044211.4044211.1737111.4044211.79130

Transform predictions to long format

pred <- reshape2::melt(pred, id.vars = feat_name)
names(pred) <- c(feat_name, 'Sample ID', method)
pred <- pred[reshape2::melt(is.na(df))['value'] == TRUE, ]
pred
A data.frame: 71601 × 3
protein groupsSample IDROWMEDIAN
<chr><fct><dbl>
11A0A075B6J9 Sample_00016.69080
14A0A075B6P5;P01615 Sample_00019.13000
15A0A075B6Q5 Sample_00016.50307
16A0A075B6R2 Sample_00017.08992
19A0A075B6S5 Sample_00016.20317
24A0A087WSY4 Sample_00015.73219
34A0A087WU43;A0A087WX17;A0A087WXI5;P12830;P12830-2 Sample_00015.74374
39A0A087WW87;A0A087X0Q4;P01614 Sample_00013.93263
40A0A087WWA5 Sample_00014.13405
42A0A087WWT2;Q9NPD7 Sample_00017.51238
43A0A087WX80;P24043 Sample_00016.04166
45A0A087WXE9;E9PQ70;Q6UXH9;Q6UXH9-2;Q6UXH9-3 Sample_00014.47215
48A0A087WYK9;Q02985;Q02985-2;Q6NSD3 Sample_00016.06865
52A0A087WZR4 Sample_00015.56904
53A0A087X089;Q16627;Q16627-2 Sample_00017.32283
56A0A087X0M8 Sample_00016.07503
59A0A087X117;A0A0G2JN29;J3KN36;P69849;Q15155;Q5JPE7;Q5JPE7-2 Sample_00015.46053
69A0A0A0MQS9;A0A0A0MTC7;Q16363;Q16363-2 Sample_00014.17913
78A0A0A0MS20;A0A0A0MSZ8;A0A0G2JM38;A0A0G2JM43;A0A0G2JM57;A0A0G2JM84;A0A0G2JMH7;A0A0G2JML1;A0A0G2JNE9;A0A0G2JNL1;A0A0G2JP25;A0A0G2JP84;A0A0G2JPA9;A0A0G2JPC7;A0A0G2JPU4;A0A0G2JPX5;A0A0G2JQ10;A0A0G2JQ20;A8MUE1;C9JST2;Q8NHJ6;Q8NHJ6-2;Q8NHJ6-3Sample_00013.81378
85A0A0A0MT32;P38571;P38571-2 Sample_00016.10676
87A0A0A0MT66 Sample_00015.91359
90A0A0A0MTC8;Q9NQ36;Q9NQ36-2;Q9NQ36-3 Sample_00014.33707
96A0A0A6YY99;O43508-2 Sample_00014.30411
101A0A0B4J1V2 Sample_00014.55081
102A0A0B4J1V6 Sample_00015.50393
105A0A0B4J1Z1;C9JAB2;Q16629;Q16629-2;Q16629-3;Q16629-4 Sample_00016.15114
108A0A0B4J2B5;S4R460 Sample_00022.12728
109A0A0B4J2C3;P13693;Q5W0H4 Sample_00013.33437
111A0A0C4DFP6;Q9NQ79;Q9NQ79-2 Sample_00020.56591
113A0A0C4DG76;Q5JZ08;Q9BQS7;Q9BQS7-2;Q9BQS7-3;Q9BQS7-4 Sample_00016.41722
298242Q96PQ0 Sample_20917.63778
298244Q96RW7;Q96RW7-2 Sample_20914.41661
298246Q96SM3 Sample_20915.85224
298249Q99538 Sample_20916.54436
298252Q99650;Q99650-2 Sample_20913.81712
298261Q9BQT9;Q9BQT9-2 Sample_20919.42851
298266Q9BT88 Sample_20914.93538
298273Q9BX67 Sample_20918.29317
298277Q9BXP8 Sample_20914.53605
298287Q9H3T2;Q9H3T2-3 Sample_20913.85970
298288Q9H3T3;Q9H3T3-3 Sample_20915.66022
298289Q9H492;Q9H492-2 Sample_20913.14098
298297Q9H8L6 Sample_20917.32389
298299Q9HBR0 Sample_20914.81192
298302Q9HC38-2 Sample_20913.80603
298303Q9HC57 Sample_20914.70747
298310Q9NPH3;Q9NPH3-2;Q9NPH3-5Sample_20915.16572
298314Q9NRB3 Sample_20914.51592
298316Q9NS00;Q9NS00-2 Sample_20913.37597
298317Q9NS84 Sample_20912.37373
298328Q9NY97;Q9NY97-2 Sample_20917.09677
298341Q9P1W8;Q9P1W8-2;Q9P1W8-4Sample_20916.01901
298349Q9UGM5;Q9UGM5-2 Sample_20916.70252
298355Q9UI40;Q9UI40-2 Sample_20916.74440
298357Q9UIW2 Sample_20915.05718
298358Q9UJ14 Sample_20914.13586
298377Q9UMX0;Q9UMX0-2;Q9UMX0-4Sample_20913.73904
298384Q9UP79 Sample_20915.52455
298388Q9Y240 Sample_20916.45373
298398Q9Y5Y7 Sample_20918.37870

Check dimension of long format dataframe

Hide code cell source

dim(pred)
  1. 71601
  2. 3

Save predictions to disk

Hide code cell source

fname = file.path(folder_experiment,
                  'preds',
                  paste0('pred_all_', toupper(method), '.csv'))
write_csv(pred, path = fname)
fname
Warning message:
“The `path` argument of `write_csv()` is deprecated as of readr 1.4.0.
 Please use the `file` argument instead.”
'runs/alzheimer_study/preds/pred_all_ROWMEDIAN.csv'