NAGuide R methods

NAGuide R methods#

Setup basic methods and packages used for all methods

  • BiocManager could be moved to methods who are installed from BioConductor

Hide code cell source

# options("install.lock"=FALSE)

packages_base_R <-
  c("BiocManager", "reshape2", "data.table", "readr", "tibble")

install_rpackage  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
  
}

# used in the large imputation function for two packages
install_bioconductor  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    BiocManager::install(pkg)
    library(pkg, character.only = TRUE)
  }
  
}


for (package in packages_base_R) {
  # Check if the package is already installed
  install_rpackage(pkg = package)
}
Loading required package: BiocManager
Loading required package: reshape2
Loading required package: data.table

Attaching package: ‘data.table’
The following objects are masked from ‘package:reshape2’:

    dcast, melt
Loading required package: readr
Loading required package: tibble

setup can be tricky… trying to integrate as much as possible into conda environment

Copied from NAGuideR’s github RShiny application. Adapted to run as standalone function in context of the Snakemake workflow.

  • df and df1 ?

  • seems quite hacky

  • code is only slightly adapted from repo to run here, mainly to install packages on the fly

Hide code cell source

nafunctions <- function(x, method = "zero") {
  df <- df1 <- as.data.frame(x)
  method <- tolower(method)
  if (method == "zero") {
    df[is.na(df)] <- 0
  }
  else if (method == "minimum") {
    df[is.na(df)] <- min(df1, na.rm = TRUE)
  }
  else if (method == "colmedian") {
    install_rpackage('e1071')
    df <- impute(df1, what = "median")
  }
  else if (method == "rowmedian") {
    install_rpackage('e1071')
    dfx <- impute(t(df1), what = "median")
    df <- t(dfx)
  }
  else if (method == "knn_impute") {
    install_bioconductor('impute')
    data_zero1 <-
      impute.knn(as.matrix(df1),
                 k = 10,
                 rowmax = 1,
                 colmax = 1)#rowmax = 0.9, colmax = 0.9
    df <- data_zero1$data
  }
  else if (method == "seqknn") {
    if (!require(SeqKnn)) {
      install.packages("src/R_NAGuideR/SeqKnn_1.0.1.tar.gz",
                       repos = NULL,
                       type = "source")
      library(SeqKnn)
    }
    df <- SeqKNN(df1, k = 10)
  }
  else if (method == "bpca") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(
        as.matrix(df1),
        nPcs = ncol(df1) - 1,
        method = "bpca",
        maxSteps = 100
      )
    df <- completeObs(data_zero1)
  }
  else if (method == "svdmethod") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(as.matrix(df1),
                      nPcs = ncol(df1) - 1,
                      method = "svdImpute")
    df <- completeObs(data_zero1)
  }
  else if (method == "lls") {
    install_bioconductor('pcaMethods')
    data_zero1 <- llsImpute(t(df1), k = 10)
    df <- t(completeObs(data_zero1))
  }
  else if (method == "mle") {
    install_rpackage('norm')
    xxm <- as.matrix(df1)
    ss <- norm::prelim.norm(xxm)
    thx <- norm::em.norm(ss)
    norm::rngseed(123)
    df <- norm::imp.norm(ss, thx, xxm)
  }
  else if (method == "qrilc") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- t(df1)
    data_zero1 <-
      imputeLCMD::impute.QRILC(xxm, tune.sigma = 1)[[1]]
    df <- t(data_zero1)
  }
  else if (method == "mindet") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <- imputeLCMD::impute.MinDet(xxm, q = 0.01)
  }
  else if (method == "minprob") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <-
      imputeLCMD::impute.MinProb(xxm, q = 0.01, tune.sigma = 1)
  }
  else if (method == "irm") {
    install_rpackage('VIM')
    df <- irmi(df1, trace = TRUE, imp_var = FALSE)
    rownames(df) <- rownames(df1)
  }
  else if (method == "impseq") {
    install_rpackage('rrcovNA')
    df <- impSeq(df1)
  }
  else if (method == "impseqrob") {
    install_rpackage('rrcovNA')
    data_zero1 <- impSeqRob(df1, alpha = 0.9)
    df <- data_zero1$x
  }
  else if (method == "mice-norm") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "norm")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "mice-cart") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "cart")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "trknn") {
    source('src/R_NAGuideR/Imput_funcs.r')
    # sim_trKNN_wrapper <- function(data) {
    #   result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t
    #   return(result)
    # }
    # df1x <- sim_trKNN_wrapper(t(df1))
    # df<-as.data.frame(t(df1x))
    df <-
      imputeKNN(as.matrix(df),
                k = 10,
                distance = 'truncation',
                perc = 0)
    df <- as.data.frame(df)
  }
  else if (method == "rf") {
    install_rpackage("missForest")
    data_zero1 <- missForest(
      t(df1),
      maxiter = 10,
      ntree = 20 # input$rfntrees
      ,
      mtry = floor(nrow(df1) ^ (1 / 3)),
      verbose = TRUE
    )
    df <- t(data_zero1$ximp)
  }
  else if (method == "pi") {
    width <- 0.3 # input$piwidth
    downshift <- 1.8 # input$pidownshift
    for (i in 1:ncol(df1)) {
      temp <- df1[[i]]
      if (sum(is.na(temp)) > 0) {
        temp.sd <- width * sd(temp[!is.na(temp)], na.rm = TRUE)
        temp.mean <-
          mean(temp[!is.na(temp)], na.rm = TRUE) - downshift * sd(temp[!is.na(temp)], na.rm = TRUE)
        n.missing <- sum(is.na(temp))
        temp[is.na(temp)] <-
          rnorm(n.missing, mean = temp.mean, sd = temp.sd)
        df[[i]] <- temp
      }
    }
    df
  }
  # else if(method=="grr"){
  #   library(DreamAI)
  #   df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
  # }
  else if (method == "gms") {
    # install.packages('GMSimpute')
    if (!require(GMSimpute)) {
      install.packages(
        "src/R_NAGuideR/GMSimpute_0.0.1.1.tar.gz",
        repos = NULL,
        type = "source"
      )
      
      library(GMSimpute)
    }
    
    df <- GMS.Lasso(df1,
                    nfolds = 3,
                    log.scale = FALSE,
                    TS.Lasso = TRUE)
  }
  else if (method == "msimpute") {
    install_bioconductor("msImpute")
    df <- msImpute(as.matrix(df),
                   method = 'v2')
    df <- as.data.frame(df)
  }
  else if (method == "msimpute_mnar") {
    install_bioconductor("msImpute")
    df <-
      msImpute(as.matrix(df),
               method = 'v2-mnar',
               group = rep(1, dim(df)[2]))
    df <- as.data.frame(df)
  }
  else if (method == "gsimp") {
    options(stringsAsFactors = F)
    # dependencies parly for sourced file
    
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    install_rpackage("magrittr")
    install_rpackage("glmnet")
    install_rpackage("abind")
    install_rpackage("foreach")
    install_rpackage("doParallel")
    source('src/R_NAGuideR/GSimp.R')
    
    # wrapper function with data pre-processing
    pre_processing_GS_wrapper <- function(data_raw_log) {
      # samples in rows, features in columns #
      # Initialization #
      data_raw_log_qrilc <- as.data.frame(data_raw_log) %>%
        impute.QRILC() %>% extract2(1)
      # Centralization and scaling #
      data_raw_log_qrilc_sc <-
        scale_recover(data_raw_log_qrilc, method = 'scale')
      # Data after centralization and scaling #
      data_raw_log_qrilc_sc_df <- data_raw_log_qrilc_sc[[1]]
      # Parameters for centralization and scaling (for scaling recovery) #
      data_raw_log_qrilc_sc_df_param <- data_raw_log_qrilc_sc[[2]]
      # NA position #
      NA_pos <- which(is.na(data_raw_log), arr.ind = T)
      # NA introduced to log-scaled-initialized data #
      data_raw_log_sc <- data_raw_log_qrilc_sc_df
      data_raw_log_sc[NA_pos] <- NA
      # Feed initialized and missing data into GSimp imputation #
      result <-
        data_raw_log_sc %>% GS_impute(
          .,
          iters_each = 50,
          iters_all = 10,
          initial = data_raw_log_qrilc_sc_df,
          lo = -Inf,
          hi = 'min',
          n_cores = 1,
          imp_model = 'glmnet_pred'
        )
      data_imp_log_sc <- result$data_imp
      # Data recovery #
      data_imp <- data_imp_log_sc %>%
        scale_recover(., method = 'recover',
                      param_df = data_raw_log_qrilc_sc_df_param) %>%
        extract2(1)
      return(data_imp)
    }
    df <- t(df) # samples in rows, feature in columns
    df <- pre_processing_GS_wrapper(df)
    df <- t(df) # features in rows, samples in columns
    
  }
  else{
    stop(paste("Unspported methods so far: ", method))
  }
  df <- as.data.frame(df)
  df
}

Parameters#

Choose one of the available methods. Some methods might fail for your dataset for unknown reasons (and the error won’t always be easy to understand)

method = 'ZERO'
method = 'MINIMUM'
method = 'COLMEDIAN'
method = 'ROWMEDIAN'
method = 'KNN_IMPUTE'
method = 'SEQKNN'
method = 'BPCA'
method = 'SVDMETHOD'
method = 'LLS'
method = 'MLE'
mehtod = 'LLS'
method = 'QRILC'
method = 'MINDET'
method = 'MINPROB'
method = 'IRM'
method = 'IMPSEQ'
method = 'IMPSEQROB'
method = 'MICE-NORM'
method = 'MICE-CART'
method = 'RF'
method = 'PI'
method = 'GMS'
method = 'TRKNN',
method = 'MSIMPUTE'
method = 'MSIMPUTE_MNAR'
method = 'GSIMP'
train_split = 'runs/example/data/data_wide_sample_cols.csv' # test
folder_experiment = 'runs/example/'
method = 'KNN_IMPUTE'
# Parameters
train_split = "runs/alzheimer_study/data/data_wide_sample_cols.csv"
method = "PI"
folder_experiment = "runs/alzheimer_study"

Dump predictions#

df <-
  utils::read.csv(
    train_split,
    row.names = 1,
    header = TRUE,
    stringsAsFactors = FALSE
  )
df
A data.frame: 1421 × 210
Sample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_009Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P3523715.91237 NA16.1114916.1069715.6032115.8116115.4996615.2207515.98013 NA NA15.8626415.6559415.40126 NA15.6822515.7980415.7394915.47682 NA
A0A024R0T9;K7ER74;P0265516.8519416.87369 NA17.0315115.3305118.6140717.4091717.6839116.3862516.5897217.3095516.6148017.9533918.1993817.2791116.8863517.55400 NA16.7794817.26137
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q815.5704715.5189215.9353215.8018715.3752215.6242015.9118515.3849915.8944715.37538 NA15.9317914.8587115.12451 NA14.9100615.5996615.4689614.9949615.17487
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O7550316.4810816.3869416.4157716.9786216.6790615.9576416.2338916.4179316.2713416.2097015.9540216.0295916.4975316.5128416.5126916.4823215.9378216.8977316.1324616.23505
A0A075B6H717.30138 NA18.1748015.96322 NA18.31720 NA17.2139317.79403 NA NA16.4826214.8346713.5689314.48257 NA NA NA NA NA
A0A075B6H920.2464619.9411019.2509019.6279720.4498318.6856920.3472220.7960418.9952319.24667 NA NA18.2698619.8006220.1831517.7048518.1545018.6363114.9075417.89344
A0A075B6I016.7644618.7862416.8321717.8520918.6818517.8654318.2499520.3093617.9784319.4104718.0055018.5414217.8074819.8986519.6742017.0394518.1517917.95040 NA17.74443
A0A075B6I117.5835717.1438815.6707318.8774917.0813114.8236918.5196919.3564018.5644917.9631016.4140217.5051014.0191117.5057420.25111 NA16.5026716.32073 NA16.37054
A0A075B6I616.98778 NA17.0118314.1815314.1402117.0417917.3756016.9979417.2031116.7583915.7254317.0675816.4584916.3277816.3337216.4130416.8600316.4007116.1186815.77958
A0A075B6I920.0541619.0673318.5690618.9850619.6856419.3758019.3988520.5435918.8439718.8997521.8526320.1465019.5345919.8671320.7662619.1021118.5379618.8489718.3677618.80581
A0A075B6J9 NA16.18799 NA13.4381314.4948415.9087516.18436 NA16.1701916.32362 NA NA16.6319317.4357419.7781116.06423 NA17.58044 NA16.53239
A0A075B6K416.1485016.1272715.3867216.5650216.4177116.45498 NA17.0467716.0321016.1490818.5816218.5173716.4835416.0633817.1912315.3503316.58173 NA NA16.33787
A0A075B6K517.3426117.4172217.2363316.2671717.3902018.3360417.2176518.3248817.5245818.7760819.5156618.9890617.8061416.7481918.4125417.1538417.9018417.5189917.5022016.98874
A0A075B6P5;P01615 NA NA NA16.9896517.4933018.2692317.7898918.1533417.7870318.3521319.92434 NA NA19.8413619.2662017.1752819.5750619.3058818.9765118.68810
A0A075B6Q5 NA NA15.12836 NA NA NA NA16.73949 NA NA NA17.0259816.1028715.87972 NA NA NA16.88355 NA15.68019
A0A075B6R2 NA NA16.2798116.7774417.4974417.0539616.9177019.7722216.7370517.9567017.9982218.1213119.19184 NA17.2858916.5977717.0704416.06459 NA16.03648
A0A075B6R9;A0A0C4DH6819.5164019.37284 NA19.0092319.5414719.5671920.1850720.5645419.5232419.3871620.1274920.3736019.4998318.9773920.5883218.0733718.8467319.6205419.3392620.39256
A0A075B6S215.5112416.2082414.8457015.23242 NA16.2865314.65632 NA14.5121216.5692515.0742315.9130416.6596315.8011717.4537814.1015316.6121316.2198415.6958216.38320
A0A075B6S5 NA NA NA NA NA15.9601616.1527717.0784216.8465916.3338515.5131516.14929 NA15.3513315.9116115.6517016.03424 NA NA NA
A0A075B6S916.5576517.5159717.9399916.9514717.0879517.3609916.4210319.7908017.1339915.4731717.2595820.5021819.2650718.4246818.1972516.1456319.5037318.4250117.7519016.21995
A0A075B7B814.91594 NA14.65865 NA15.3488614.5011115.1219614.5114215.31218 NA NA NA NA NA NA NA NA NA NA NA
A0A075B7D018.35754 NA18.2301819.2610319.7863719.6555118.5159320.5000819.5057220.00935 NA19.23066 NA18.0771119.4352819.0449817.0216318.0826218.7376717.31493
A0A087WSV8;V9HW7515.6758714.50492 NA15.5530115.52087 NA15.21659 NA13.85270 NA NA15.5257615.2093415.5381413.9836015.1491715.89044 NA15.4187214.79854
A0A087WSY4 NA17.1499315.65052 NA NA NA NA NA NA NA15.85031 NA16.2078115.33813 NA15.12749 NA NA NA20.34338
A0A087WSY5;Q96IY417.8632318.0413017.7766818.3417818.4288817.8513117.6895319.1298418.0186818.2319617.8611717.1273117.2878418.3573418.0921317.1149617.1954117.9442618.0847817.34736
A0A087WSY620.1168620.2745019.2808019.7819019.7265820.7958019.6323920.5975919.9661018.7243921.2815519.5135919.2447419.7366319.37543 NA19.7985518.6509218.4342219.15982
A0A087WSZ0;A0A0G2JQJ016.68463 NA NA NA NA13.03775 NA15.2586915.5957813.63975 NA15.9783514.31120 NA17.3274714.8897215.6365813.7425916.4765213.66795
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-217.5937718.0124017.8292118.2685017.5570617.3805117.3108216.4085917.3750618.0290816.9424917.74655 NA17.69916 NA18.0800017.4805717.8153518.1862417.91903
A0A087WTA8;P0812317.6190818.2168517.5444517.5022517.8028816.9918417.9773917.7099417.5321317.3504517.5621916.5727916.90066 NA NA16.9340216.74856 NA17.1775717.26857
A0A087WTK0;A0A087WVC6;Q1291315.5012115.3069615.8147115.5515615.3213116.0441516.18863 NA15.5648615.37830 NA15.7602915.21911 NA NA NA15.3939615.24665 NA NA
Q9UN70;Q9UN70-216.1573415.7245516.4318216.2305215.95639 NA16.4384315.6314615.7212615.74455 NA15.6839115.4991315.9858515.0297915.4857115.5279816.0801215.6316515.77433
Q9UNN817.5605617.3015917.76464 NA17.1159118.1904217.9768217.0015917.09973 NA16.77195 NA17.7796717.5903017.3782116.4571316.4507517.8933217.6361817.39555
Q9UNW116.9599716.5129016.14734 NA16.5081815.6471416.4181416.4868116.3143817.10669 NA16.21768 NA16.06726 NA16.3173416.2100416.5703716.1786816.53072
Q9UP7915.15310 NA16.0362115.4160915.5027716.3383315.8980214.59940 NA15.94205 NA NA15.08015 NA15.2610415.73977 NA NA16.10664 NA
Q9UPU318.3254018.23025 NA18.1004018.1773818.4628817.9007617.7512118.7432117.7462416.7844818.4704719.0300518.6485618.1108918.4219618.3693717.6405917.2421018.27864
Q9UQ5215.3490115.5498915.71187 NA15.6280915.5390215.4003114.5504515.0872415.2123315.8143715.6499415.60246 NA15.0466615.7163716.1233015.6857115.8348516.18904
Q9UQM716.7798316.6355716.9280017.3646817.2613916.5729216.34126 NA17.18529 NA15.4493817.1391517.4659815.9544616.0165117.9082516.2232617.0897316.3884517.38407
Q9Y24016.2570016.2715416.5778017.13331 NA16.7610916.9081016.8798015.2639316.6680716.7483116.5384216.9056716.7234616.6139616.7144216.2557216.5080217.28611 NA
Q9Y279;Q9Y279-217.1679417.4455617.3634517.2928917.2204716.9875417.17998 NA17.1630416.5564115.0289316.5458916.5845016.5875517.6596316.6165616.70633 NA NA16.95138
Q9Y281;Q9Y281-3 NA NA NA NA NA NA NA NA NA NA NA12.30069 NA NA NA13.0103313.07666 NA NA12.54090
Q9Y28717.6628917.58819 NA17.6961417.7354117.0309416.9508318.26680 NA16.89611 NA NA16.5786617.9707417.1072818.2760816.4771616.5435416.1737817.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-616.9277617.2207417.4259517.2986117.8281616.7418116.7206915.9049916.3961117.3693516.2202616.7063517.8930417.7065917.09718 NA16.9573018.3758717.5733317.93369
Q9Y2T3;Q9Y2T3-318.4676717.7211318.4401218.8656518.7322618.63877 NA17.1155719.3320318.0933816.5274618.1635417.6509217.5202617.4169318.6247718.2610718.5141918.0065017.90217
Q9Y490 NA13.96193 NA NA13.6269013.3812113.74195 NA NA13.37095 NA NA NA NA NA12.9323313.29442 NA13.8408613.52534
Q9Y4L118.5979518.47622 NA18.5596418.3046518.5237918.3602117.8747118.6725818.4313117.1867318.4140618.2671018.0855717.5979218.2899117.9772018.1487517.8805518.12515
Q9Y5F6;Q9Y5F6-216.4692915.78240 NA16.5289916.2848016.8634415.8995316.1095217.4013216.3436715.4592616.4288416.1595015.6315614.7356915.9682116.8848115.8777215.5542416.57524
Q9Y5I4;Q9Y5I4-217.1870117.4471717.4104317.5451417.2966717.3972817.1453216.4048217.3230817.4575315.8973117.13489 NA17.0858815.8738617.1035417.1090216.9384017.1550816.77629
Q9Y5Y718.8395719.1949519.0882018.7149718.6682419.0513319.0871118.8024518.7143918.7874517.1345217.8372718.7856218.6136718.7374718.7263918.4601119.50151 NA NA
Q9Y61716.8589716.7992716.2878217.07490 NA16.4995216.7026415.9239816.6377516.4427714.5826314.9398815.8697015.7117715.4650715.8083115.0352916.2830615.9196015.71292
Q9Y64619.3219619.1902919.7019419.7601319.6236919.2862819.6221919.0164419.1501219.5165219.8615620.2448419.2073120.3028119.4350719.8938820.0145720.3061420.2034220.04164
Q9Y653;Q9Y653-2;Q9Y653-316.0124315.5279415.2287115.4948414.7566815.0856315.78890 NA15.63408 NA NA14.9520115.5336115.0601415.66794 NA15.4222815.8079615.1574015.23671
Q9Y69615.1777815.5755314.7277014.59034 NA14.8599516.1363214.9492815.2656814.63326 NA15.8447915.8078416.1657415.9144815.6838716.1063316.0978016.7117615.65174
Q9Y6C2 NA NA13.7565814.68157 NA NA NA14.31805 NA NA NA NA NA NA14.2040714.23561 NA14.40272 NA15.21141
Q9Y6N615.0502114.8329015.1182815.1401815.2558814.69813 NA NA15.29271 NA14.7428215.1248215.2456815.2850215.0246115.4150415.3453615.7153214.6398314.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-416.8422116.5973917.4395217.3560817.0749517.0880316.71735 NA17.4531916.91617 NA17.0342516.7477916.39020 NA17.5509217.08447 NA16.5327616.74913
Q9Y6R7 NA20.2993319.5978519.4288019.5819819.1303318.6903018.9960519.9931818.5503419.2330418.8163618.9021218.81282 NA17.9218518.7081418.7247819.4111319.27470
Q9Y6X5 NA15.5555815.73522 NA15.32828 NA NA NA NA NA15.8946915.5618916.0887715.6691115.0119316.34016 NA16.1382715.8069015.73176
Q9Y6Y8;Q9Y6Y8-219.5628219.3857520.4473220.21596 NA19.6331920.0568618.6804420.0232319.94839 NA19.8094820.2043619.4182317.8474319.9276619.4326019.5991219.5450019.57720
Q9Y6Y9 NA13.9699712.63641 NA13.14453 NA12.8174112.89658 NA13.68470 NA NA12.7073812.9781212.2875012.92941 NA13.63705 NA11.04203
S4R3U612.8052112.4424512.5046612.44461 NA NA NA NA13.00793 NA NA10.13265 NA10.4976910.56335 NA NA11.17371 NA11.79130
  • data.frame does not allow abritary column names, but only valid column names…

  • tibbles don’t support rownames, and the imputation methods rely on normal data.frames. Save the header row for later use.

original_header <- colnames(readr::read_csv(
  train_split,
  n_max = 1,
  col_names = TRUE,
  skip = 0
))
feat_name <- original_header[1]
original_header[1:5]
Rows: 1 Columns: 211
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (1): protein groups
dbl (180): Sample_000, Sample_002, Sample_003, Sample_004, Sample_005, Sampl...
lgl  (30): Sample_001, Sample_009, Sample_012, Sample_015, Sample_017, Sampl...

 Use `spec()` to retrieve the full column specification for this data.
 Specify the column types or set `show_col_types = FALSE` to quiet this message.
  1. 'protein groups'
  2. 'Sample_000'
  3. 'Sample_001'
  4. 'Sample_002'
  5. 'Sample_003'

Uncomment to test certain methods (only for debugging, as at least one method per package is tested using Github Actions)

Hide code cell source

# to_test <- c(
# 'ZERO',
# 'MINIMUM',
# 'COLMEDIAN',
# 'ROWMEDIAN',
# 'KNN_IMPUTE',
# 'SEQKNN',
# 'BPCA',
# 'SVDMETHOD',
# 'LLS',
# 'MLE',
# 'LLS',
# 'QRILC',
# 'MINDET',
# 'MINPROB',
# 'IRM',
# 'IMPSEQ',
# 'IMPSEQROB',
# 'MICE-NORM',
# 'MICE-CART',
# 'RF',
# 'PI',
# 'GMS', # fails to install on Windows
# 'TRKNN',
# 'MSIMPUTE'
# 'MSIMPUTE_MNAR'
# 'GSIMP'
# )

# for (method in to_test) {
#     print(method)
#     pred <- nafunctions(df, method)
# }

Impute and save predictions with original feature and column names

pred <- nafunctions(df, method)
pred <- tibble::as_tibble(cbind(rownames(pred), pred))
names(pred) <- original_header
pred
A tibble: 1421 × 211
protein groupsSample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P35237 15.9123711.4129616.1114916.1069715.6032115.8116115.4996615.2207515.9801312.4694115.8626415.6559415.4012612.9663415.6822515.7980415.7394915.4768212.16677
A0A024R0T9;K7ER74;P02655 16.8519416.8736913.8351217.0315115.3305118.6140717.4091717.6839116.3862517.3095516.6148017.9533918.1993817.2791116.8863517.5540013.8464116.7794817.26137
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q815.5704715.5189215.9353215.8018715.3752215.6242015.9118515.3849915.8944713.4521915.9317914.8587115.1245112.4308614.9100615.5996615.4689614.9949615.17487
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O75503 16.4810816.3869416.4157716.9786216.6790615.9576416.2338916.4179316.2713415.9540216.0295916.4975316.5128416.5126916.4823215.9378216.8977316.1324616.23505
A0A075B6H7 17.3013812.3781018.1748015.9632213.5129018.3172012.3994317.2139317.7940312.0055016.4826214.8346713.5689314.4825712.3360112.2739012.7896813.0758312.32100
A0A075B6H9 20.2464619.9411019.2509019.6279720.4498318.6856920.3472220.7960418.9952312.5122013.4711518.2698619.8006220.1831517.7048518.1545018.6363114.9075417.89344
A0A075B6I0 16.7644618.7862416.8321717.8520918.6818517.8654318.2499520.3093617.9784318.0055018.5414217.8074819.8986519.6742017.0394518.1517917.9504013.9032617.74443
A0A075B6I1 17.5835717.1438815.6707318.8774917.0813114.8236918.5196919.3564018.5644916.4140217.5051014.0191117.5057420.2511113.6882216.5026716.3207313.1504616.37054
A0A075B6I6 16.9877813.3090517.0118314.1815314.1402117.0417917.3756016.9979417.2031115.7254317.0675816.4584916.3277816.3337216.4130416.8600316.4007116.1186815.77958
A0A075B6I9 20.0541619.0673318.5690618.9850619.6856419.3758019.3988520.5435918.8439721.8526320.1465019.5345919.8671320.7662619.1021118.5379618.8489718.3677618.80581
A0A075B6J9 13.4115316.1879913.0215213.4381314.4948415.9087516.1843613.1203116.1701911.9920413.6047616.6319317.4357419.7781116.0642312.9466017.5804412.3359616.53239
A0A075B6K4 16.1485016.1272715.3867216.5650216.4177116.4549813.3287417.0467716.0321018.5816218.5173716.4835416.0633817.1912315.3503316.5817312.9614612.6174316.33787
A0A075B6K5 17.3426117.4172217.2363316.2671717.3902018.3360417.2176518.3248817.5245819.5156618.9890617.8061416.7481918.4125417.1538417.9018417.5189917.5022016.98874
A0A075B6P5;P01615 13.5541512.4781712.7198916.9896517.4933018.2692317.7898918.1533417.7870319.9243411.9082213.1223719.8413619.2662017.1752819.5750619.3058818.9765118.68810
A0A075B6Q5 13.9671012.8259715.1283613.8987113.8174613.6924012.4116916.7394912.3539311.4795117.0259816.1028715.8797212.4459713.2597612.6199916.8835513.6213215.68019
A0A075B6R2 12.0529013.3426216.2798116.7774417.4974417.0539616.9177019.7722216.7370517.9982218.1213119.1918412.9350617.2858916.5977717.0704416.0645912.5679016.03648
A0A075B6R9;A0A0C4DH68 19.5164019.3728413.2049319.0092319.5414719.5671920.1850720.5645419.5232420.1274920.3736019.4998318.9773920.5883218.0733718.8467319.6205419.3392620.39256
A0A075B6S2 15.5112416.2082414.8457015.2324213.0114716.2865314.6563213.1917314.5121215.0742315.9130416.6596315.8011717.4537814.1015316.6121316.2198415.6958216.38320
A0A075B6S5 13.4188113.7638514.2621212.1189612.6287615.9601616.1527717.0784216.8465915.5131516.1492911.5499715.3513315.9116115.6517016.0342413.4954711.5410611.96333
A0A075B6S9 16.5576517.5159717.9399916.9514717.0879517.3609916.4210319.7908017.1339917.2595820.5021819.2650718.4246818.1972516.1456319.5037318.4250117.7519016.21995
A0A075B7B8 14.9159412.2749314.6586513.8398115.3488614.5011115.1219614.5114215.3121812.5834011.6622112.0256813.1388611.8379312.1040113.7396413.9957311.6564912.55987
A0A075B7D0 18.3575413.1302618.2301819.2610319.7863719.6555118.5159320.5000819.5057213.0469719.2306613.6574218.0771119.4352819.0449817.0216318.0826218.7376717.31493
A0A087WSV8;V9HW75 15.6758714.5049213.5293515.5530115.5208712.4417415.2165914.6051713.8527013.6136115.5257615.2093415.5381413.9836015.1491715.8904413.0352315.4187214.79854
A0A087WSY4 14.2556317.1499315.6505212.7822011.7589512.4891811.4520013.6212011.9342015.8503112.2708916.2078115.3381311.4021215.1274911.6348212.7822211.7467520.34338
A0A087WSY5;Q96IY4 17.8632318.0413017.7766818.3417818.4288817.8513117.6895319.1298418.0186817.8611717.1273117.2878418.3573418.0921317.1149617.1954117.9442618.0847817.34736
A0A087WSY6 20.1168620.2745019.2808019.7819019.7265820.7958019.6323920.5975919.9661021.2815519.5135919.2447419.7366319.3754311.4622019.7985518.6509218.4342219.15982
A0A087WSZ0;A0A0G2JQJ0 16.6846312.7363911.0441013.1845712.5709513.0377512.4190115.2586915.5957813.9434615.9783514.3112012.0837217.3274714.8897215.6365813.7425916.4765213.66795
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-2 17.5937718.0124017.8292118.2685017.5570617.3805117.3108216.4085917.3750616.9424917.7465512.6035517.6991611.5671818.0800017.4805717.8153518.1862417.91903
A0A087WTA8;P08123 17.6190818.2168517.5444517.5022517.8028816.9918417.9773917.7099417.5321317.5621916.5727916.9006611.9715312.4684616.9340216.7485613.5898117.1775717.26857
A0A087WTK0;A0A087WVC6;Q12913 15.5012115.3069615.8147115.5515615.3213116.0441516.1886313.4290215.5648613.2485015.7602915.2191112.5632811.0875314.1727915.3939615.2466514.1553711.51414
Q9UN70;Q9UN70-2 16.1573415.7245516.4318216.2305215.9563913.8315616.4384315.6314615.7212612.5607215.6839115.4991315.9858515.0297915.4857115.5279816.0801215.6316515.77433
Q9UNN8 17.5605617.3015917.7646411.2672317.1159118.1904217.9768217.0015917.0997316.7719513.4041517.7796717.5903017.3782116.4571316.4507517.8933217.6361817.39555
Q9UNW1 16.9599716.5129016.1473413.3021616.5081815.6471416.4181416.4868116.3143812.6426816.2176812.2232116.0672612.6865816.3173416.2100416.5703716.1786816.53072
Q9UP79 15.1531012.0219916.0362115.4160915.5027716.3383315.8980214.59940 9.9616812.3379913.9209615.0801512.2592115.2610415.7397713.9959112.8361716.1066412.55311
Q9UPU3 18.3254018.2302513.3996618.1004018.1773818.4628817.9007617.7512118.7432116.7844818.4704719.0300518.6485618.1108918.4219618.3693717.6405917.2421018.27864
Q9UQ52 15.3490115.5498915.7118713.7453715.6280915.5390215.4003114.5504515.0872415.8143715.6499415.6024613.6961515.0466615.7163716.1233015.6857115.8348516.18904
Q9UQM7 16.7798316.6355716.9280017.3646817.2613916.5729216.3412612.9995317.1852915.4493817.1391517.4659815.9544616.0165117.9082516.2232617.0897316.3884517.38407
Q9Y240 16.2570016.2715416.5778017.1333112.1925316.7610916.9081016.8798015.2639316.7483116.5384216.9056716.7234616.6139616.7144216.2557216.5080217.2861111.07577
Q9Y279;Q9Y279-2 17.1679417.4455617.3634517.2928917.2204716.9875417.1799813.1362117.1630415.0289316.5458916.5845016.5875517.6596316.6165616.7063312.1300111.5936816.95138
Q9Y281;Q9Y281-3 12.7222512.7273612.2318813.4976713.2664312.4135412.6675812.7359413.5995812.5553112.3006911.2070712.5499112.8669413.0103313.0766613.7754312.0860412.54090
Q9Y287 17.6628917.5881914.1006817.6961417.7354117.0309416.9508318.2668012.4027412.9786112.8776316.5786617.9707417.1072818.2760816.4771616.5435416.1737817.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-616.9277617.2207417.4259517.2986117.8281616.7418116.7206915.9049916.3961116.2202616.7063517.8930417.7065917.0971812.3033816.9573018.3758717.5733317.93369
Q9Y2T3;Q9Y2T3-3 18.4676717.7211318.4401218.8656518.7322618.6387713.4839117.1155719.3320316.5274618.1635417.6509217.5202617.4169318.6247718.2610718.5141918.0065017.90217
Q9Y490 13.1643513.9619313.0562712.9727513.6269013.3812113.7419512.6008612.4262912.8517512.7442712.5616512.7957312.6507412.9323313.2944212.8862813.8408613.52534
Q9Y4L1 18.5979518.4762210.8848318.5596418.3046518.5237918.3602117.8747118.6725817.1867318.4140618.2671018.0855717.5979218.2899117.9772018.1487517.8805518.12515
Q9Y5F6;Q9Y5F6-2 16.4692915.7824013.0725616.5289916.2848016.8634415.8995316.1095217.4013215.4592616.4288416.1595015.6315614.7356915.9682116.8848115.8777215.5542416.57524
Q9Y5I4;Q9Y5I4-2 17.1870117.4471717.4104317.5451417.2966717.3972817.1453216.4048217.3230815.8973117.1348912.3084817.0858815.8738617.1035417.1090216.9384017.1550816.77629
Q9Y5Y7 18.8395719.1949519.0882018.7149718.6682419.0513319.0871118.8024518.7143917.1345217.8372718.7856218.6136718.7374718.7263918.4601119.5015112.3533412.24519
Q9Y617 16.8589716.7992716.2878217.0749014.0130216.4995216.7026415.9239816.6377514.5826314.9398815.8697015.7117715.4650715.8083115.0352916.2830615.9196015.71292
Q9Y646 19.3219619.1902919.7019419.7601319.6236919.2862819.6221919.0164419.1501219.8615620.2448419.2073120.3028119.4350719.8938820.0145720.3061420.2034220.04164
Q9Y653;Q9Y653-2;Q9Y653-3 16.0124315.5279415.2287115.4948414.7566815.0856315.7889014.0202015.6340813.0264714.9520115.5336115.0601415.6679413.8517215.4222815.8079615.1574015.23671
Q9Y696 15.1777815.5755314.7277014.5903412.8946114.8599516.1363214.9492815.2656811.7510315.8447915.8078416.1657415.9144815.6838716.1063316.0978016.7117615.65174
Q9Y6C2 12.6049112.2107113.7565814.6815712.1872911.4847312.5732314.3180512.1976310.5154611.9233513.9434911.9030614.2040714.2356112.6418914.4027211.3539615.21141
Q9Y6N6 15.0502114.8329015.1182815.1401815.2558814.6981313.1538013.6443515.2927114.7428215.1248215.2456815.2850215.0246115.4150415.3453615.7153214.6398314.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-4 16.8422116.5973917.4395217.3560817.0749517.0880316.7173512.7084817.4531911.0689717.0342516.7477916.3902013.5226217.5509217.0844712.7897716.5327616.74913
Q9Y6R7 13.6201020.2993319.5978519.4288019.5819819.1303318.6903018.9960519.9931819.2330418.8163618.9021218.8128211.5610517.9218518.7081418.7247819.4111319.27470
Q9Y6X5 12.9536015.5555815.7352212.5149315.3282813.2388112.8236913.3597112.8668215.8946915.5618916.0887715.6691115.0119316.3401612.5377816.1382715.8069015.73176
Q9Y6Y8;Q9Y6Y8-2 19.5628219.3857520.4473220.2159612.7070819.6331920.0568618.6804420.0232311.6552519.8094820.2043619.4182317.8474319.9276619.4326019.5991219.5450019.57720
Q9Y6Y9 12.4359313.9699712.6364112.7660313.1445311.8358012.8174112.8965812.9935212.7977911.9818712.7073812.9781212.2875012.9294112.9306313.6370512.7258211.04203
S4R3U6 12.8052112.4424512.5046612.4446111.8561013.9217913.8398710.8820413.0079313.2511110.1326512.0515610.4976910.5633512.7378912.3714011.1737112.6186611.79130

Transform predictions to long format

pred <- reshape2::melt(pred, id.vars = feat_name)
names(pred) <- c(feat_name, 'Sample ID', method)
pred <- pred[reshape2::melt(is.na(df))['value'] == TRUE, ]
pred
A data.frame: 71601 × 3
protein groupsSample IDPI
<chr><fct><dbl>
11A0A075B6J9 Sample_00013.41153
14A0A075B6P5;P01615 Sample_00013.55415
15A0A075B6Q5 Sample_00013.96710
16A0A075B6R2 Sample_00012.05290
19A0A075B6S5 Sample_00013.41881
24A0A087WSY4 Sample_00014.25563
34A0A087WU43;A0A087WX17;A0A087WXI5;P12830;P12830-2 Sample_00012.09579
39A0A087WW87;A0A087X0Q4;P01614 Sample_00011.54184
40A0A087WWA5 Sample_00014.03446
42A0A087WWT2;Q9NPD7 Sample_00012.17343
43A0A087WX80;P24043 Sample_00012.20409
45A0A087WXE9;E9PQ70;Q6UXH9;Q6UXH9-2;Q6UXH9-3 Sample_00013.64749
48A0A087WYK9;Q02985;Q02985-2;Q6NSD3 Sample_00013.18781
52A0A087WZR4 Sample_00012.58107
53A0A087X089;Q16627;Q16627-2 Sample_00012.37590
56A0A087X0M8 Sample_00011.91548
59A0A087X117;A0A0G2JN29;J3KN36;P69849;Q15155;Q5JPE7;Q5JPE7-2 Sample_00013.34308
69A0A0A0MQS9;A0A0A0MTC7;Q16363;Q16363-2 Sample_00011.92651
78A0A0A0MS20;A0A0A0MSZ8;A0A0G2JM38;A0A0G2JM43;A0A0G2JM57;A0A0G2JM84;A0A0G2JMH7;A0A0G2JML1;A0A0G2JNE9;A0A0G2JNL1;A0A0G2JP25;A0A0G2JP84;A0A0G2JPA9;A0A0G2JPC7;A0A0G2JPU4;A0A0G2JPX5;A0A0G2JQ10;A0A0G2JQ20;A8MUE1;C9JST2;Q8NHJ6;Q8NHJ6-2;Q8NHJ6-3Sample_00013.22806
85A0A0A0MT32;P38571;P38571-2 Sample_00012.39764
87A0A0A0MT66 Sample_00013.70503
90A0A0A0MTC8;Q9NQ36;Q9NQ36-2;Q9NQ36-3 Sample_00013.50943
96A0A0A6YY99;O43508-2 Sample_00012.15219
101A0A0B4J1V2 Sample_00012.69792
102A0A0B4J1V6 Sample_00012.90929
105A0A0B4J1Z1;C9JAB2;Q16629;Q16629-2;Q16629-3;Q16629-4 Sample_00012.19075
108A0A0B4J2B5;S4R460 Sample_00013.90198
109A0A0B4J2C3;P13693;Q5W0H4 Sample_00013.78604
111A0A0C4DFP6;Q9NQ79;Q9NQ79-2 Sample_00013.45485
113A0A0C4DG76;Q5JZ08;Q9BQS7;Q9BQS7-2;Q9BQS7-3;Q9BQS7-4 Sample_00012.66149
298242Q96PQ0 Sample_20913.76743
298244Q96RW7;Q96RW7-2 Sample_20912.77663
298246Q96SM3 Sample_20912.74675
298249Q99538 Sample_20912.31367
298252Q99650;Q99650-2 Sample_20913.26045
298261Q9BQT9;Q9BQT9-2 Sample_20912.64939
298266Q9BT88 Sample_20911.54715
298273Q9BX67 Sample_20911.81845
298277Q9BXP8 Sample_20912.57196
298287Q9H3T2;Q9H3T2-3 Sample_20911.81209
298288Q9H3T3;Q9H3T3-3 Sample_20911.99796
298289Q9H492;Q9H492-2 Sample_20912.77538
298297Q9H8L6 Sample_20913.06893
298299Q9HBR0 Sample_20912.40398
298302Q9HC38-2 Sample_20911.65559
298303Q9HC57 Sample_20912.20477
298310Q9NPH3;Q9NPH3-2;Q9NPH3-5Sample_20913.76125
298314Q9NRB3 Sample_20912.27028
298316Q9NS00;Q9NS00-2 Sample_20912.40202
298317Q9NS84 Sample_20912.46862
298328Q9NY97;Q9NY97-2 Sample_20913.39984
298341Q9P1W8;Q9P1W8-2;Q9P1W8-4Sample_20912.53982
298349Q9UGM5;Q9UGM5-2 Sample_20913.37656
298355Q9UI40;Q9UI40-2 Sample_20911.81050
298357Q9UIW2 Sample_20912.70399
298358Q9UJ14 Sample_20913.76294
298377Q9UMX0;Q9UMX0-2;Q9UMX0-4Sample_20911.62637
298384Q9UP79 Sample_20912.55311
298388Q9Y240 Sample_20911.07577
298398Q9Y5Y7 Sample_20912.24519

Check dimension of long format dataframe

Hide code cell source

dim(pred)
  1. 71601
  2. 3

Save predictions to disk

Hide code cell source

fname = file.path(folder_experiment,
                  'preds',
                  paste0('pred_all_', toupper(method), '.csv'))
write_csv(pred, path = fname)
fname
Warning message:
“The `path` argument of `write_csv()` is deprecated as of readr 1.4.0.
 Please use the `file` argument instead.”
'runs/alzheimer_study/preds/pred_all_PI.csv'