NAGuide R methods

NAGuide R methods#

Setup basic methods and packages used for all methods

  • BiocManager could be moved to methods who are installed from BioConductor

Hide code cell source

# options("install.lock"=FALSE)

packages_base_R <-
  c("BiocManager", "reshape2", "data.table", "readr", "tibble")

install_rpackage  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
  
}

# used in the large imputation function for two packages
install_bioconductor  <- function(pkg) {
  # If not installed, install the package
  if (!require(pkg, character.only = TRUE)) {
    BiocManager::install(pkg)
    library(pkg, character.only = TRUE)
  }
  
}


for (package in packages_base_R) {
  # Check if the package is already installed
  install_rpackage(pkg = package)
}
Loading required package: BiocManager
Loading required package: reshape2
Loading required package: data.table

Attaching package: ‘data.table’
The following objects are masked from ‘package:reshape2’:

    dcast, melt
Loading required package: readr
Loading required package: tibble

setup can be tricky… trying to integrate as much as possible into conda environment

Copied from NAGuideR’s github RShiny application. Adapted to run as standalone function in context of the Snakemake workflow.

  • df and df1 ?

  • seems quite hacky

  • code is only slightly adapted from repo to run here, mainly to install packages on the fly

Hide code cell source

nafunctions <- function(x, method = "zero") {
  df <- df1 <- as.data.frame(x)
  method <- tolower(method)
  if (method == "zero") {
    df[is.na(df)] <- 0
  }
  else if (method == "minimum") {
    df[is.na(df)] <- min(df1, na.rm = TRUE)
  }
  else if (method == "colmedian") {
    install_rpackage('e1071')
    df <- impute(df1, what = "median")
  }
  else if (method == "rowmedian") {
    install_rpackage('e1071')
    dfx <- impute(t(df1), what = "median")
    df <- t(dfx)
  }
  else if (method == "knn_impute") {
    install_bioconductor('impute')
    data_zero1 <-
      impute.knn(as.matrix(df1),
                 k = 10,
                 rowmax = 1,
                 colmax = 1)#rowmax = 0.9, colmax = 0.9
    df <- data_zero1$data
  }
  else if (method == "seqknn") {
    if (!require(SeqKnn)) {
      install.packages("src/R_NAGuideR/SeqKnn_1.0.1.tar.gz",
                       repos = NULL,
                       type = "source")
      library(SeqKnn)
    }
    df <- SeqKNN(df1, k = 10)
  }
  else if (method == "bpca") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(
        as.matrix(df1),
        nPcs = ncol(df1) - 1,
        method = "bpca",
        maxSteps = 100
      )
    df <- completeObs(data_zero1)
  }
  else if (method == "svdmethod") {
    install_bioconductor('pcaMethods')
    data_zero1 <-
      pcaMethods::pca(as.matrix(df1),
                      nPcs = ncol(df1) - 1,
                      method = "svdImpute")
    df <- completeObs(data_zero1)
  }
  else if (method == "lls") {
    install_bioconductor('pcaMethods')
    data_zero1 <- llsImpute(t(df1), k = 10)
    df <- t(completeObs(data_zero1))
  }
  else if (method == "mle") {
    install_rpackage('norm')
    xxm <- as.matrix(df1)
    ss <- norm::prelim.norm(xxm)
    thx <- norm::em.norm(ss)
    norm::rngseed(123)
    df <- norm::imp.norm(ss, thx, xxm)
  }
  else if (method == "qrilc") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- t(df1)
    data_zero1 <-
      imputeLCMD::impute.QRILC(xxm, tune.sigma = 1)[[1]]
    df <- t(data_zero1)
  }
  else if (method == "mindet") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <- imputeLCMD::impute.MinDet(xxm, q = 0.01)
  }
  else if (method == "minprob") {
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    xxm <- as.matrix(df1)
    df <-
      imputeLCMD::impute.MinProb(xxm, q = 0.01, tune.sigma = 1)
  }
  else if (method == "irm") {
    install_rpackage('VIM')
    df <- irmi(df1, trace = TRUE, imp_var = FALSE)
    rownames(df) <- rownames(df1)
  }
  else if (method == "impseq") {
    install_rpackage('rrcovNA')
    df <- impSeq(df1)
  }
  else if (method == "impseqrob") {
    install_rpackage('rrcovNA')
    data_zero1 <- impSeqRob(df1, alpha = 0.9)
    df <- data_zero1$x
  }
  else if (method == "mice-norm") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "norm")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "mice-cart") {
    install_rpackage('mice')
    minum <- 5
    datareadmi <- mice(df1,
                       m = minum,
                       seed = 1234,
                       method = "cart")
    newdatareadmi <- 0
    for (i in 1:minum) {
      newdatareadmi <- complete(datareadmi, action = i) + newdatareadmi
    }
    df <- newdatareadmi / minum
    rownames(df) <- rownames(df1)
  }
  else if (method == "trknn") {
    source('src/R_NAGuideR/Imput_funcs.r')
    # sim_trKNN_wrapper <- function(data) {
    #   result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t
    #   return(result)
    # }
    # df1x <- sim_trKNN_wrapper(t(df1))
    # df<-as.data.frame(t(df1x))
    df <-
      imputeKNN(as.matrix(df),
                k = 10,
                distance = 'truncation',
                perc = 0)
    df <- as.data.frame(df)
  }
  else if (method == "rf") {
    install_rpackage("missForest")
    data_zero1 <- missForest(
      t(df1),
      maxiter = 10,
      ntree = 20 # input$rfntrees
      ,
      mtry = floor(nrow(df1) ^ (1 / 3)),
      verbose = TRUE
    )
    df <- t(data_zero1$ximp)
  }
  else if (method == "pi") {
    width <- 0.3 # input$piwidth
    downshift <- 1.8 # input$pidownshift
    for (i in 1:ncol(df1)) {
      temp <- df1[[i]]
      if (sum(is.na(temp)) > 0) {
        temp.sd <- width * sd(temp[!is.na(temp)], na.rm = TRUE)
        temp.mean <-
          mean(temp[!is.na(temp)], na.rm = TRUE) - downshift * sd(temp[!is.na(temp)], na.rm = TRUE)
        n.missing <- sum(is.na(temp))
        temp[is.na(temp)] <-
          rnorm(n.missing, mean = temp.mean, sd = temp.sd)
        df[[i]] <- temp
      }
    }
    df
  }
  # else if(method=="grr"){
  #   library(DreamAI)
  #   df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
  # }
  else if (method == "gms") {
    # install.packages('GMSimpute')
    if (!require(GMSimpute)) {
      install.packages(
        "src/R_NAGuideR/GMSimpute_0.0.1.1.tar.gz",
        repos = NULL,
        type = "source"
      )
      
      library(GMSimpute)
    }
    
    df <- GMS.Lasso(df1,
                    nfolds = 3,
                    log.scale = FALSE,
                    TS.Lasso = TRUE)
  }
  else if (method == "msimpute") {
    install_bioconductor("msImpute")
    df <- msImpute(as.matrix(df),
                   method = 'v2')
    df <- as.data.frame(df)
  }
  else if (method == "msimpute_mnar") {
    install_bioconductor("msImpute")
    df <-
      msImpute(as.matrix(df),
               method = 'v2-mnar',
               group = rep(1, dim(df)[2]))
    df <- as.data.frame(df)
  }
  else if (method == "gsimp") {
    options(stringsAsFactors = F)
    # dependencies parly for sourced file
    
    install_bioconductor("impute")
    install_bioconductor("pcaMethods")
    install_rpackage('gmm')
    install_rpackage('imputeLCMD')
    install_rpackage("magrittr")
    install_rpackage("glmnet")
    install_rpackage("abind")
    install_rpackage("foreach")
    install_rpackage("doParallel")
    source('src/R_NAGuideR/GSimp.R')
    
    # wrapper function with data pre-processing
    pre_processing_GS_wrapper <- function(data_raw_log) {
      # samples in rows, features in columns #
      # Initialization #
      data_raw_log_qrilc <- as.data.frame(data_raw_log) %>%
        impute.QRILC() %>% extract2(1)
      # Centralization and scaling #
      data_raw_log_qrilc_sc <-
        scale_recover(data_raw_log_qrilc, method = 'scale')
      # Data after centralization and scaling #
      data_raw_log_qrilc_sc_df <- data_raw_log_qrilc_sc[[1]]
      # Parameters for centralization and scaling (for scaling recovery) #
      data_raw_log_qrilc_sc_df_param <- data_raw_log_qrilc_sc[[2]]
      # NA position #
      NA_pos <- which(is.na(data_raw_log), arr.ind = T)
      # NA introduced to log-scaled-initialized data #
      data_raw_log_sc <- data_raw_log_qrilc_sc_df
      data_raw_log_sc[NA_pos] <- NA
      # Feed initialized and missing data into GSimp imputation #
      result <-
        data_raw_log_sc %>% GS_impute(
          .,
          iters_each = 50,
          iters_all = 10,
          initial = data_raw_log_qrilc_sc_df,
          lo = -Inf,
          hi = 'min',
          n_cores = 1,
          imp_model = 'glmnet_pred'
        )
      data_imp_log_sc <- result$data_imp
      # Data recovery #
      data_imp <- data_imp_log_sc %>%
        scale_recover(., method = 'recover',
                      param_df = data_raw_log_qrilc_sc_df_param) %>%
        extract2(1)
      return(data_imp)
    }
    df <- t(df) # samples in rows, feature in columns
    df <- pre_processing_GS_wrapper(df)
    df <- t(df) # features in rows, samples in columns
    
  }
  else{
    stop(paste("Unspported methods so far: ", method))
  }
  df <- as.data.frame(df)
  df
}

Parameters#

Choose one of the available methods. Some methods might fail for your dataset for unknown reasons (and the error won’t always be easy to understand)

method = 'ZERO'
method = 'MINIMUM'
method = 'COLMEDIAN'
method = 'ROWMEDIAN'
method = 'KNN_IMPUTE'
method = 'SEQKNN'
method = 'BPCA'
method = 'SVDMETHOD'
method = 'LLS'
method = 'MLE'
mehtod = 'LLS'
method = 'QRILC'
method = 'MINDET'
method = 'MINPROB'
method = 'IRM'
method = 'IMPSEQ'
method = 'IMPSEQROB'
method = 'MICE-NORM'
method = 'MICE-CART'
method = 'RF'
method = 'PI'
method = 'GMS'
method = 'TRKNN',
method = 'MSIMPUTE'
method = 'MSIMPUTE_MNAR'
method = 'GSIMP'
train_split = 'runs/example/data/data_wide_sample_cols.csv' # test
folder_experiment = 'runs/example/'
method = 'KNN_IMPUTE'
# Parameters
train_split = "runs/alzheimer_study/data/data_wide_sample_cols.csv"
method = "IMPSEQROB"
folder_experiment = "runs/alzheimer_study"

Dump predictions#

df <-
  utils::read.csv(
    train_split,
    row.names = 1,
    header = TRUE,
    stringsAsFactors = FALSE
  )
df
A data.frame: 1421 × 210
Sample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_009Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P3523715.91237 NA16.1114916.1069715.6032115.8116115.4996615.2207515.98013 NA NA15.8626415.6559415.40126 NA15.6822515.7980415.7394915.47682 NA
A0A024R0T9;K7ER74;P0265516.8519416.87369 NA17.0315115.3305118.6140717.4091717.6839116.3862516.5897217.3095516.6148017.9533918.1993817.2791116.8863517.55400 NA16.7794817.26137
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q815.5704715.5189215.9353215.8018715.3752215.6242015.9118515.3849915.8944715.37538 NA15.9317914.8587115.12451 NA14.9100615.5996615.4689614.9949615.17487
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O7550316.4810816.3869416.4157716.9786216.6790615.9576416.2338916.4179316.2713416.2097015.9540216.0295916.4975316.5128416.5126916.4823215.9378216.8977316.1324616.23505
A0A075B6H717.30138 NA18.1748015.96322 NA18.31720 NA17.2139317.79403 NA NA16.4826214.8346713.5689314.48257 NA NA NA NA NA
A0A075B6H920.2464619.9411019.2509019.6279720.4498318.6856920.3472220.7960418.9952319.24667 NA NA18.2698619.8006220.1831517.7048518.1545018.6363114.9075417.89344
A0A075B6I016.7644618.7862416.8321717.8520918.6818517.8654318.2499520.3093617.9784319.4104718.0055018.5414217.8074819.8986519.6742017.0394518.1517917.95040 NA17.74443
A0A075B6I117.5835717.1438815.6707318.8774917.0813114.8236918.5196919.3564018.5644917.9631016.4140217.5051014.0191117.5057420.25111 NA16.5026716.32073 NA16.37054
A0A075B6I616.98778 NA17.0118314.1815314.1402117.0417917.3756016.9979417.2031116.7583915.7254317.0675816.4584916.3277816.3337216.4130416.8600316.4007116.1186815.77958
A0A075B6I920.0541619.0673318.5690618.9850619.6856419.3758019.3988520.5435918.8439718.8997521.8526320.1465019.5345919.8671320.7662619.1021118.5379618.8489718.3677618.80581
A0A075B6J9 NA16.18799 NA13.4381314.4948415.9087516.18436 NA16.1701916.32362 NA NA16.6319317.4357419.7781116.06423 NA17.58044 NA16.53239
A0A075B6K416.1485016.1272715.3867216.5650216.4177116.45498 NA17.0467716.0321016.1490818.5816218.5173716.4835416.0633817.1912315.3503316.58173 NA NA16.33787
A0A075B6K517.3426117.4172217.2363316.2671717.3902018.3360417.2176518.3248817.5245818.7760819.5156618.9890617.8061416.7481918.4125417.1538417.9018417.5189917.5022016.98874
A0A075B6P5;P01615 NA NA NA16.9896517.4933018.2692317.7898918.1533417.7870318.3521319.92434 NA NA19.8413619.2662017.1752819.5750619.3058818.9765118.68810
A0A075B6Q5 NA NA15.12836 NA NA NA NA16.73949 NA NA NA17.0259816.1028715.87972 NA NA NA16.88355 NA15.68019
A0A075B6R2 NA NA16.2798116.7774417.4974417.0539616.9177019.7722216.7370517.9567017.9982218.1213119.19184 NA17.2858916.5977717.0704416.06459 NA16.03648
A0A075B6R9;A0A0C4DH6819.5164019.37284 NA19.0092319.5414719.5671920.1850720.5645419.5232419.3871620.1274920.3736019.4998318.9773920.5883218.0733718.8467319.6205419.3392620.39256
A0A075B6S215.5112416.2082414.8457015.23242 NA16.2865314.65632 NA14.5121216.5692515.0742315.9130416.6596315.8011717.4537814.1015316.6121316.2198415.6958216.38320
A0A075B6S5 NA NA NA NA NA15.9601616.1527717.0784216.8465916.3338515.5131516.14929 NA15.3513315.9116115.6517016.03424 NA NA NA
A0A075B6S916.5576517.5159717.9399916.9514717.0879517.3609916.4210319.7908017.1339915.4731717.2595820.5021819.2650718.4246818.1972516.1456319.5037318.4250117.7519016.21995
A0A075B7B814.91594 NA14.65865 NA15.3488614.5011115.1219614.5114215.31218 NA NA NA NA NA NA NA NA NA NA NA
A0A075B7D018.35754 NA18.2301819.2610319.7863719.6555118.5159320.5000819.5057220.00935 NA19.23066 NA18.0771119.4352819.0449817.0216318.0826218.7376717.31493
A0A087WSV8;V9HW7515.6758714.50492 NA15.5530115.52087 NA15.21659 NA13.85270 NA NA15.5257615.2093415.5381413.9836015.1491715.89044 NA15.4187214.79854
A0A087WSY4 NA17.1499315.65052 NA NA NA NA NA NA NA15.85031 NA16.2078115.33813 NA15.12749 NA NA NA20.34338
A0A087WSY5;Q96IY417.8632318.0413017.7766818.3417818.4288817.8513117.6895319.1298418.0186818.2319617.8611717.1273117.2878418.3573418.0921317.1149617.1954117.9442618.0847817.34736
A0A087WSY620.1168620.2745019.2808019.7819019.7265820.7958019.6323920.5975919.9661018.7243921.2815519.5135919.2447419.7366319.37543 NA19.7985518.6509218.4342219.15982
A0A087WSZ0;A0A0G2JQJ016.68463 NA NA NA NA13.03775 NA15.2586915.5957813.63975 NA15.9783514.31120 NA17.3274714.8897215.6365813.7425916.4765213.66795
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-217.5937718.0124017.8292118.2685017.5570617.3805117.3108216.4085917.3750618.0290816.9424917.74655 NA17.69916 NA18.0800017.4805717.8153518.1862417.91903
A0A087WTA8;P0812317.6190818.2168517.5444517.5022517.8028816.9918417.9773917.7099417.5321317.3504517.5621916.5727916.90066 NA NA16.9340216.74856 NA17.1775717.26857
A0A087WTK0;A0A087WVC6;Q1291315.5012115.3069615.8147115.5515615.3213116.0441516.18863 NA15.5648615.37830 NA15.7602915.21911 NA NA NA15.3939615.24665 NA NA
Q9UN70;Q9UN70-216.1573415.7245516.4318216.2305215.95639 NA16.4384315.6314615.7212615.74455 NA15.6839115.4991315.9858515.0297915.4857115.5279816.0801215.6316515.77433
Q9UNN817.5605617.3015917.76464 NA17.1159118.1904217.9768217.0015917.09973 NA16.77195 NA17.7796717.5903017.3782116.4571316.4507517.8933217.6361817.39555
Q9UNW116.9599716.5129016.14734 NA16.5081815.6471416.4181416.4868116.3143817.10669 NA16.21768 NA16.06726 NA16.3173416.2100416.5703716.1786816.53072
Q9UP7915.15310 NA16.0362115.4160915.5027716.3383315.8980214.59940 NA15.94205 NA NA15.08015 NA15.2610415.73977 NA NA16.10664 NA
Q9UPU318.3254018.23025 NA18.1004018.1773818.4628817.9007617.7512118.7432117.7462416.7844818.4704719.0300518.6485618.1108918.4219618.3693717.6405917.2421018.27864
Q9UQ5215.3490115.5498915.71187 NA15.6280915.5390215.4003114.5504515.0872415.2123315.8143715.6499415.60246 NA15.0466615.7163716.1233015.6857115.8348516.18904
Q9UQM716.7798316.6355716.9280017.3646817.2613916.5729216.34126 NA17.18529 NA15.4493817.1391517.4659815.9544616.0165117.9082516.2232617.0897316.3884517.38407
Q9Y24016.2570016.2715416.5778017.13331 NA16.7610916.9081016.8798015.2639316.6680716.7483116.5384216.9056716.7234616.6139616.7144216.2557216.5080217.28611 NA
Q9Y279;Q9Y279-217.1679417.4455617.3634517.2928917.2204716.9875417.17998 NA17.1630416.5564115.0289316.5458916.5845016.5875517.6596316.6165616.70633 NA NA16.95138
Q9Y281;Q9Y281-3 NA NA NA NA NA NA NA NA NA NA NA12.30069 NA NA NA13.0103313.07666 NA NA12.54090
Q9Y28717.6628917.58819 NA17.6961417.7354117.0309416.9508318.26680 NA16.89611 NA NA16.5786617.9707417.1072818.2760816.4771616.5435416.1737817.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-616.9277617.2207417.4259517.2986117.8281616.7418116.7206915.9049916.3961117.3693516.2202616.7063517.8930417.7065917.09718 NA16.9573018.3758717.5733317.93369
Q9Y2T3;Q9Y2T3-318.4676717.7211318.4401218.8656518.7322618.63877 NA17.1155719.3320318.0933816.5274618.1635417.6509217.5202617.4169318.6247718.2610718.5141918.0065017.90217
Q9Y490 NA13.96193 NA NA13.6269013.3812113.74195 NA NA13.37095 NA NA NA NA NA12.9323313.29442 NA13.8408613.52534
Q9Y4L118.5979518.47622 NA18.5596418.3046518.5237918.3602117.8747118.6725818.4313117.1867318.4140618.2671018.0855717.5979218.2899117.9772018.1487517.8805518.12515
Q9Y5F6;Q9Y5F6-216.4692915.78240 NA16.5289916.2848016.8634415.8995316.1095217.4013216.3436715.4592616.4288416.1595015.6315614.7356915.9682116.8848115.8777215.5542416.57524
Q9Y5I4;Q9Y5I4-217.1870117.4471717.4104317.5451417.2966717.3972817.1453216.4048217.3230817.4575315.8973117.13489 NA17.0858815.8738617.1035417.1090216.9384017.1550816.77629
Q9Y5Y718.8395719.1949519.0882018.7149718.6682419.0513319.0871118.8024518.7143918.7874517.1345217.8372718.7856218.6136718.7374718.7263918.4601119.50151 NA NA
Q9Y61716.8589716.7992716.2878217.07490 NA16.4995216.7026415.9239816.6377516.4427714.5826314.9398815.8697015.7117715.4650715.8083115.0352916.2830615.9196015.71292
Q9Y64619.3219619.1902919.7019419.7601319.6236919.2862819.6221919.0164419.1501219.5165219.8615620.2448419.2073120.3028119.4350719.8938820.0145720.3061420.2034220.04164
Q9Y653;Q9Y653-2;Q9Y653-316.0124315.5279415.2287115.4948414.7566815.0856315.78890 NA15.63408 NA NA14.9520115.5336115.0601415.66794 NA15.4222815.8079615.1574015.23671
Q9Y69615.1777815.5755314.7277014.59034 NA14.8599516.1363214.9492815.2656814.63326 NA15.8447915.8078416.1657415.9144815.6838716.1063316.0978016.7117615.65174
Q9Y6C2 NA NA13.7565814.68157 NA NA NA14.31805 NA NA NA NA NA NA14.2040714.23561 NA14.40272 NA15.21141
Q9Y6N615.0502114.8329015.1182815.1401815.2558814.69813 NA NA15.29271 NA14.7428215.1248215.2456815.2850215.0246115.4150415.3453615.7153214.6398314.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-416.8422116.5973917.4395217.3560817.0749517.0880316.71735 NA17.4531916.91617 NA17.0342516.7477916.39020 NA17.5509217.08447 NA16.5327616.74913
Q9Y6R7 NA20.2993319.5978519.4288019.5819819.1303318.6903018.9960519.9931818.5503419.2330418.8163618.9021218.81282 NA17.9218518.7081418.7247819.4111319.27470
Q9Y6X5 NA15.5555815.73522 NA15.32828 NA NA NA NA NA15.8946915.5618916.0887715.6691115.0119316.34016 NA16.1382715.8069015.73176
Q9Y6Y8;Q9Y6Y8-219.5628219.3857520.4473220.21596 NA19.6331920.0568618.6804420.0232319.94839 NA19.8094820.2043619.4182317.8474319.9276619.4326019.5991219.5450019.57720
Q9Y6Y9 NA13.9699712.63641 NA13.14453 NA12.8174112.89658 NA13.68470 NA NA12.7073812.9781212.2875012.92941 NA13.63705 NA11.04203
S4R3U612.8052112.4424512.5046612.44461 NA NA NA NA13.00793 NA NA10.13265 NA10.4976910.56335 NA NA11.17371 NA11.79130
  • data.frame does not allow abritary column names, but only valid column names…

  • tibbles don’t support rownames, and the imputation methods rely on normal data.frames. Save the header row for later use.

original_header <- colnames(readr::read_csv(
  train_split,
  n_max = 1,
  col_names = TRUE,
  skip = 0
))
feat_name <- original_header[1]
original_header[1:5]
Rows: 1 Columns: 211
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (1): protein groups
dbl (180): Sample_000, Sample_002, Sample_003, Sample_004, Sample_005, Sampl...
lgl  (30): Sample_001, Sample_009, Sample_012, Sample_015, Sample_017, Sampl...

 Use `spec()` to retrieve the full column specification for this data.
 Specify the column types or set `show_col_types = FALSE` to quiet this message.
  1. 'protein groups'
  2. 'Sample_000'
  3. 'Sample_001'
  4. 'Sample_002'
  5. 'Sample_003'

Uncomment to test certain methods (only for debugging, as at least one method per package is tested using Github Actions)

Hide code cell source

# to_test <- c(
# 'ZERO',
# 'MINIMUM',
# 'COLMEDIAN',
# 'ROWMEDIAN',
# 'KNN_IMPUTE',
# 'SEQKNN',
# 'BPCA',
# 'SVDMETHOD',
# 'LLS',
# 'MLE',
# 'LLS',
# 'QRILC',
# 'MINDET',
# 'MINPROB',
# 'IRM',
# 'IMPSEQ',
# 'IMPSEQROB',
# 'MICE-NORM',
# 'MICE-CART',
# 'RF',
# 'PI',
# 'GMS', # fails to install on Windows
# 'TRKNN',
# 'MSIMPUTE'
# 'MSIMPUTE_MNAR'
# 'GSIMP'
# )

# for (method in to_test) {
#     print(method)
#     pred <- nafunctions(df, method)
# }

Impute and save predictions with original feature and column names

pred <- nafunctions(df, method)
pred <- tibble::as_tibble(cbind(rownames(pred), pred))
names(pred) <- original_header
pred
Not enough complete observations: only  0 complete, needed atleast  234 ...

Trying to impute the minimum necessary complete observations using the multivariate normal model ...
Loading required package: rrcovNA
Loading required package: rrcov
Loading required package: robustbase
Scalable Robust Estimators with High Breakdown Point (version 1.7-7)
Scalable Robust Estimators with High Breakdown Point for
Incomplete Data (version 0.5-3)
Warning message in prelim.norm(x):
“NAs introduced by coercion to integer range”
A tibble: 1421 × 211
protein groupsSample_000Sample_001Sample_002Sample_003Sample_004Sample_005Sample_006Sample_007Sample_008Sample_200Sample_201Sample_202Sample_203Sample_204Sample_205Sample_206Sample_207Sample_208Sample_209
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
A0A024QZX5;A0A087X1N8;P35237 15.912372 24.015395 16.111492 16.1069686 15.60321415.8116115.499663 15.2207515.98013 93.012460 15.86264 15.65594 15.4012577 87.83193 15.68225315.798040 15.7394864 15.47682313.483870
A0A024R0T9;K7ER74;P02655 16.851936 16.8736922448.502994 17.0315136 15.33050718.6140717.409166 17.6839116.38625 17.309554 16.61480 17.95339 18.1993788 17.27911 16.88635017.5540032466.8702636 16.77948417.261372
A0A024R3W6;A0A024R412;O60462;O60462-2;O60462-3;O60462-4;O60462-5;Q7LBX6;X5D2Q8 15.570475 15.518923 15.935318 15.8018702 15.37522315.6242015.911849 15.3849915.89447 20.950428 15.93179 14.85871 15.1245137 73.71449 14.91006115.599656 15.4689589 14.99495915.174868
A0A024R644;A0A0A0MRU5;A0A1B0GWI2;O75503 16.481080 16.386944 16.415769 16.9786215 16.67905615.9576416.233887 16.4179316.27134 15.954024 16.02959 16.49753 16.5128352 16.51269 16.48232015.937820 16.8977338 16.13245616.235049
A0A075B6H7 17.301384 3.183314 18.174802 15.9632183-44.25480318.3172019.866970 17.2139317.79403 37.864865 16.48262 14.83467 13.5689315 14.48257 11.88778517.225955 0.0870051 34.96763139.956443
A0A075B6H9 20.246458 19.941099 19.250897 19.6279665 20.44982618.6856920.347225 20.7960418.99523 103.654249119.42909 18.26986 19.8006194 20.18315 17.70485418.154495 18.6363064 14.90754017.893438
A0A075B6I0 16.764464 18.786238 16.832174 17.8520920 18.68184617.8654318.249949 20.3093617.97843 18.005504 18.54142 17.80748 19.8986549 19.67420 17.03945218.151788 17.9503970 133.86481117.744434
A0A075B6I1 17.583571 17.143882 15.670730 18.8774933 17.08130814.8236918.519687 19.3564018.56449 16.414016 17.50510 14.01911 17.5057385 20.25111 -2.97594316.502674 16.3207298-168.44081716.370537
A0A075B6I6 16.987778 29.228599 17.011825 14.1815331 14.14020917.0417917.375604 16.9979417.20311 15.725431 17.06758 16.45849 16.3277774 16.33372 16.41303616.860030 16.4007101 16.11867515.779584
A0A075B6I9 20.054163 19.067330 18.569057 18.9850649 19.68564319.3758019.398847 20.5435918.84397 21.852634 20.14650 19.53459 19.8671301 20.76626 19.10211318.537958 18.8489675 18.36776218.805807
A0A075B6J9 22.549259 16.187994 -26.399943 13.4381272 14.49483815.9087516.184364-29.8173816.17019 81.643058 65.86547 16.63193 17.4357361 19.77811 16.064228-5.225605 17.5804429 48.97234716.532394
A0A075B6K4 16.148495 16.127274 15.386717 16.5650213 16.41770616.45498 4.878698 17.0467716.03210 18.581617 18.51737 16.48354 16.0633821 17.19123 15.35032716.581730 155.4939261 20.51922016.337871
A0A075B6K5 17.342608 17.417224 17.236328 16.2671697 17.39020418.3360417.217651 18.3248817.52458 19.515657 18.98906 17.80614 16.7481938 18.41254 17.15383917.901842 17.5189909 17.50220416.988744
A0A075B6P5;P01615 229.376220 172.088294 -77.120892 16.9896457 17.49330218.2692317.789886 18.1533417.78703 19.924336285.46723 42.78669 19.8413613 19.26620 17.17527519.575064 19.3058775 18.97650818.688095
A0A075B6Q5 9.648014 22.323836 15.128364 26.6896725 11.44186029.8939319.569066 16.7394916.85870 28.683906 17.02598 16.10287 15.8797234 35.65169 22.34960117.038776 16.8835495 40.89186215.680192
A0A075B6R2 31.885333 24.438378 16.279812 16.7774424 17.49744017.0539616.917702 19.7722216.73705 17.998216 18.12131 19.19184 19.8244173 17.28589 16.59776717.070441 16.0645948 -17.60157116.036479
A0A075B6R9;A0A0C4DH68 19.516404 19.3728452448.502994 19.0092301 19.54147019.5671920.185071 20.5645419.52324 20.127490 20.37360 19.49983 18.9773918 20.58832 18.07336718.846728 19.6205356 19.33925720.392558
A0A075B6S2 15.511237 16.208238 14.845699 15.2324214-71.13318016.2865314.656319-32.5162414.51212 15.074225 15.91304 16.65963 15.8011735 17.45378 14.10153316.612134 16.2198399 15.69582016.383202
A0A075B6S5 4.910113 46.853612 -7.172031-30.1981658-19.92157215.9601616.152768 17.0784216.84659 15.513146 16.14929 21.76293 15.3513346 15.91161 15.65170216.034236 -18.5303439 8.50528614.044892
A0A075B6S9 16.557646 17.515965 17.939992 16.9514660 17.08795017.3609916.421033 19.7908017.13399 17.259580 20.50218 19.26507 18.4246797 18.19725 16.14562819.503731 18.4250135 17.75190116.219953
A0A075B7B8 14.915938 18.638071 14.658653 -0.2287049 15.34885614.5011115.121960 14.5114215.31218 62.690113 88.37904 22.48674 0.9953016 36.82646 53.80137616.180251 17.4702564 36.192677 2.737746
A0A075B7D0 18.3575412489.135535 18.230181 19.2610263 19.78637319.6555118.515926 20.5000819.505722672.832006 19.230662480.85687 18.0771127 19.43528 19.04497617.021626 18.0826243 18.73766817.314927
A0A087WSV8;V9HW75 15.675870 14.504916 17.115920 15.5530121 15.52087012.8873315.216594 57.2987013.85270 51.570104 15.52576 15.20934 15.5381403 13.98360 15.14917415.890440 -9.4856206 15.41872114.798540
A0A087WSY4 32.059051 17.149932 15.650525 26.5739303 -2.72706217.23574 3.424836 16.7790313.35640 15.850311 40.24557 16.20781 15.3381303 35.21665 15.127494 4.927357 23.2291888 14.11509520.343378
A0A087WSY5;Q96IY4 17.863227 18.041304 17.776680 18.3417780 18.42888517.8513117.689533 19.1298418.01868 17.861174 17.12731 17.28784 18.3573356 18.09213 17.11495917.195406 17.9442620 18.08477917.347361
A0A087WSY6 20.116861 20.274504 19.280797 19.7818950 19.72658120.7958019.632388 20.5975919.96610 21.281549 19.51359 19.24474 19.7366264 19.375432434.52178619.798549 18.6509177 18.43421819.159821
A0A087WSZ0;A0A0G2JQJ0 16.684627 35.360591 5.130564 49.1357335 50.82872513.0377517.214073 15.2586915.59578 41.891149 15.97835 14.31120 7.1674737 17.32747 14.88972015.636581 13.7425927 16.47651713.667954
A0A087WTA1;A0A0A0MR20;E9PDN6;F5H107;Q9C0A0;Q9C0A0-2 17.593775 18.012397 17.829210 18.2684969 17.55705917.3805117.310825 16.4085917.37506 16.942487 17.746552480.85687 17.69916102710.74936 18.07999917.480566 17.8153519 18.18624517.919028
A0A087WTA8;P08123 17.619078 18.216848 17.544449 17.5022512 17.80288416.9918417.977386 17.7099417.53213 17.562187 16.57279 16.90066-39.4956184 57.14146 16.93401516.748555 -0.3245493 17.17757317.268568
A0A087WTK0;A0A087WVC6;Q12913 15.501210 15.306962 15.814712 15.5515585 15.32131116.0441516.188633 53.4803215.56486 3.715981 15.76029 15.21911 4.1275474 42.47212 15.33289515.393958 15.2466522 36.70296945.579004
Q9UN70;Q9UN70-2 16.15734 15.72455 16.431818916.23052 15.9563942.876641 16.438433 15.63145915.721263 52.6368915.68391 15.499129015.98584615.02979 15.4857115.527984 16.08012 15.63165015.77433
Q9UNN8 17.56056 17.30159 17.764635136.57193 17.1159118.190423 17.976816 17.00159317.099726 16.7719596.22036 17.779673617.59030017.37821 16.4571316.450753 17.89332 17.63617817.39555
Q9UNW1 16.95997 16.51290 16.147342456.74216 16.5081815.647136 16.418141 16.48680716.314377 76.8427816.21768 47.819593316.06725989.64230 16.3173416.210038 16.57037 16.17868116.53072
Q9UP79 15.15310 24.78908 16.036211515.41609 15.5027716.338332 15.898021 14.59940514.237242 43.6064628.14658 15.080148913.56305515.26104 15.73977 2.566064 -10.64512 16.10664013.71919
Q9UPU3 18.32540 18.23025 -50.154122518.10040 18.1773818.462883 17.900758 17.75120918.743208 16.7844818.47047 19.030050518.64855718.11089 18.4219618.369369 17.64059 17.24210318.27864
Q9UQ52 15.34901 15.54989 15.711866419.12104 15.6280915.539016 15.400309 14.55044815.087244 15.8143715.64994 15.6024594 3.06139515.04666 15.7163716.123303 15.68571 15.83485416.18904
Q9UQM7 16.77983 16.63557 16.928003817.36468 17.2613916.572919 16.3412552714.83693317.185289 15.4493817.13915 17.465977615.95446216.01651 17.9082516.223256 17.08973 16.38844917.38407
Q9Y240 16.25700 16.27154 16.577796317.13331 10.8683716.761094 16.908104 16.87980215.263930 16.7483116.53842 16.905671116.72345916.61396 16.7144216.255719 16.50802 17.28611118.31175
Q9Y279;Q9Y279-2 17.16794 17.44556 17.363445417.29289 17.2204716.987538 17.179980 90.05717817.163038 15.0289316.54589 16.584503016.58754817.65963 16.6165616.706326-133.89269150.18417816.95138
Q9Y281;Q9Y281-3 21.23690 16.93783 21.864415721.77716 15.1816723.637466 22.757632 49.68333726.550558 34.6515012.30069 22.407148113.70460133.83110 13.0103313.076657 19.94714 28.32884912.54090
Q9Y287 17.66289 17.58819 0.689507617.69614 17.7354117.030942 16.950826 18.266799 2.304339 19.0527822.10505 16.578655817.97073817.10728 18.2760816.477157 16.54354 16.17377717.82544
Q9Y2I2;Q9Y2I2-1;Q9Y2I2-2;Q9Y2I2-4;Q9Y2I2-5;Q9Y2I2-6 16.92776 17.22074 17.425950617.29861 17.8281616.741809 16.720691 15.90499216.396113 16.2202616.70635 17.893043217.70659117.097182434.5217916.957301 18.37587 17.57332617.93369
Q9Y2T3;Q9Y2T3-3 18.46767 17.72113 18.440118518.86565 18.7322618.6387702508.998686 17.11556719.332031 16.5274618.16354 17.650924817.52026417.41693 18.6247718.261075 18.51419 18.00650017.90217
Q9Y490 28.93782 13.96193 14.714944918.64180 13.6269013.381209 13.741950 37.07473535.876376 41.5290323.28777 20.287224120.59623232.89036 12.9323313.294424 7.53955 13.84086313.52534
Q9Y4L1 18.59795 18.476222448.502994318.55964 18.3046518.523787 18.360212 17.87471118.672576 17.1867318.41406 18.267095118.08557117.59792 18.2899117.977196 18.14875 17.88055318.12515
Q9Y5F6;Q9Y5F6-2 16.46929 15.78240 24.483432516.52899 16.2848016.863440 15.899533 16.10952317.401321 15.4592616.42884 16.159499715.63155714.73569 15.9682116.884808 15.87772 15.55423516.57524
Q9Y5I4;Q9Y5I4-2 17.18701 17.44717 17.410427717.54514 17.2966717.397283 17.145318 16.40482517.323081 15.8973117.134892480.856868517.08588315.87386 17.1035417.109020 16.93840 17.15507616.77629
Q9Y5Y7 18.83957 19.19495 19.088198518.71497 18.6682419.051331 19.087113 18.80244518.714393 17.1345217.83727 18.785623318.61367318.73747 18.7263918.460107 19.50151 30.37407452.68642
Q9Y617 16.85897 16.79927 16.287820317.07490 51.3717216.499523 16.702643 15.92397616.637746 14.5826314.93988 15.869699215.71176615.46507 15.8083115.035290 16.28306 15.91960115.71292
Q9Y646 19.32196 19.19029 19.701944219.76013 19.6236919.286283 19.622194 19.01643619.150121 19.8615620.24484 19.207305720.30281319.43507 19.8938820.014574 20.30614 20.20342320.04164
Q9Y653;Q9Y653-2;Q9Y653-3 16.01243 15.52794 15.228706315.49484 14.7566815.085631 15.788897 78.23181515.634082 68.8988614.95201 15.533605515.06013715.66794 68.2326915.422277 15.80796 15.15739715.23671
Q9Y696 15.17778 15.57553 14.727698514.59034 -19.0962714.859949 16.136319 14.94927615.265679 40.1559915.84479 15.807836216.16573915.91448 15.6838716.106326 16.09780 16.71176115.65174
Q9Y6C2 23.18153-36.37856 13.756584814.68157 -11.2928015.494045 -17.867060 14.31805019.142746 59.4909362.70575 0.401380621.88446814.20407 14.2356129.460820 14.40272 -5.13221315.21141
Q9Y6N6 15.05021 14.83290 15.118275415.14018 15.2558814.698128 -64.641719 41.72820915.292710 14.7428215.12482 15.245681515.28501715.02461 15.4150415.345359 15.71532 14.63983414.20491
Q9Y6N7;Q9Y6N7-2;Q9Y6N7-4 16.84221 16.59739 17.439517917.35608 17.0749517.088025 16.717351 129.41815717.453192 67.2795917.03425 16.747786416.39019968.87326 17.5509217.084469 -66.46037 16.53275716.74913
Q9Y6R7 -21.35931 20.29933 19.597852119.42880 19.5819819.130331 18.690297 18.99604519.993178 19.2330418.81636 18.902122018.81281673.46057 17.9218518.708140 18.72478 19.41113319.27470
Q9Y6X5 -43.93008 15.55558 15.735219861.90527 15.32828 7.054381 -79.063847 9.77855450.256424 15.8946915.56189 16.088774015.66911515.01193 16.34016 4.618698 16.13827 15.80689815.73176
Q9Y6Y8;Q9Y6Y8-2 19.56282 19.38575 20.447322620.215962442.0970419.633192 20.056856 18.68044220.0232292672.8320119.80948 20.204357119.41822817.84743 19.9276619.432601 19.59912 19.54500119.57720
Q9Y6Y9 37.79734 13.96997 12.636410626.74010 13.1445333.903539 12.817411 12.89658421.678273 54.3725226.25692 12.707375812.97811812.28750 12.92941 5.775096 13.63705-11.23635511.04203
S4R3U6 12.80521 12.44245 12.504658612.44461 -16.7295019.738981 -9.048466 8.08054413.007934 21.4811410.13265 17.665245510.49768810.56335 -19.39250 1.130181 11.17371-20.30088011.79130

Transform predictions to long format

pred <- reshape2::melt(pred, id.vars = feat_name)
names(pred) <- c(feat_name, 'Sample ID', method)
pred <- pred[reshape2::melt(is.na(df))['value'] == TRUE, ]
pred
A data.frame: 71601 × 3
protein groupsSample IDIMPSEQROB
<chr><fct><dbl>
11A0A075B6J9 Sample_000 22.549259
14A0A075B6P5;P01615 Sample_000 229.376220
15A0A075B6Q5 Sample_000 9.648014
16A0A075B6R2 Sample_000 31.885333
19A0A075B6S5 Sample_000 4.910113
24A0A087WSY4 Sample_000 32.059051
34A0A087WU43;A0A087WX17;A0A087WXI5;P12830;P12830-2 Sample_000 10.861618
39A0A087WW87;A0A087X0Q4;P01614 Sample_000 20.570299
40A0A087WWA5 Sample_000 17.307890
42A0A087WWT2;Q9NPD7 Sample_000 -38.300407
43A0A087WX80;P24043 Sample_000 33.352281
45A0A087WXE9;E9PQ70;Q6UXH9;Q6UXH9-2;Q6UXH9-3 Sample_000 21.643285
48A0A087WYK9;Q02985;Q02985-2;Q6NSD3 Sample_000 2.023343
52A0A087WZR4 Sample_000 -5.400181
53A0A087X089;Q16627;Q16627-2 Sample_000 -20.318992
56A0A087X0M8 Sample_000 1.937164
59A0A087X117;A0A0G2JN29;J3KN36;P69849;Q15155;Q5JPE7;Q5JPE7-2 Sample_000 8.335643
69A0A0A0MQS9;A0A0A0MTC7;Q16363;Q16363-2 Sample_000 10.595814
78A0A0A0MS20;A0A0A0MSZ8;A0A0G2JM38;A0A0G2JM43;A0A0G2JM57;A0A0G2JM84;A0A0G2JMH7;A0A0G2JML1;A0A0G2JNE9;A0A0G2JNL1;A0A0G2JP25;A0A0G2JP84;A0A0G2JPA9;A0A0G2JPC7;A0A0G2JPU4;A0A0G2JPX5;A0A0G2JQ10;A0A0G2JQ20;A8MUE1;C9JST2;Q8NHJ6;Q8NHJ6-2;Q8NHJ6-3Sample_000 -26.924722
85A0A0A0MT32;P38571;P38571-2 Sample_000 71.923995
87A0A0A0MT66 Sample_000 91.479196
90A0A0A0MTC8;Q9NQ36;Q9NQ36-2;Q9NQ36-3 Sample_000 28.409840
96A0A0A6YY99;O43508-2 Sample_000 28.510031
101A0A0B4J1V2 Sample_000 26.206911
102A0A0B4J1V6 Sample_000 4.900139
105A0A0B4J1Z1;C9JAB2;Q16629;Q16629-2;Q16629-3;Q16629-4 Sample_000 12.212937
108A0A0B4J2B5;S4R460 Sample_000 -10.897600
109A0A0B4J2C3;P13693;Q5W0H4 Sample_000 45.462745
111A0A0C4DFP6;Q9NQ79;Q9NQ79-2 Sample_000-115.758848
113A0A0C4DG76;Q5JZ08;Q9BQS7;Q9BQS7-2;Q9BQS7-3;Q9BQS7-4 Sample_000 94.752310
298242Q96PQ0 Sample_209 -73.349415
298244Q96RW7;Q96RW7-2 Sample_209 33.539427
298246Q96SM3 Sample_209 -7.173576
298249Q99538 Sample_209 24.773795
298252Q99650;Q99650-2 Sample_209 39.513398
298261Q9BQT9;Q9BQT9-2 Sample_209 -54.870119
298266Q9BT88 Sample_209 8.270931
298273Q9BX67 Sample_209 -10.500592
298277Q9BXP8 Sample_209 17.917862
298287Q9H3T2;Q9H3T2-3 Sample_209 16.372845
298288Q9H3T3;Q9H3T3-3 Sample_209 24.096379
298289Q9H492;Q9H492-2 Sample_209 14.712643
298297Q9H8L6 Sample_2092510.014645
298299Q9HBR0 Sample_209 16.024154
298302Q9HC38-2 Sample_209 41.176813
298303Q9HC57 Sample_209 21.444893
298310Q9NPH3;Q9NPH3-2;Q9NPH3-5Sample_209 35.577844
298314Q9NRB3 Sample_209 -31.586193
298316Q9NS00;Q9NS00-2 Sample_209 23.643607
298317Q9NS84 Sample_209 37.610320
298328Q9NY97;Q9NY97-2 Sample_209 -96.801171
298341Q9P1W8;Q9P1W8-2;Q9P1W8-4Sample_209 18.546408
298349Q9UGM5;Q9UGM5-2 Sample_209 82.601063
298355Q9UI40;Q9UI40-2 Sample_209 45.122091
298357Q9UIW2 Sample_209 -30.940084
298358Q9UJ14 Sample_209 -1.291112
298377Q9UMX0;Q9UMX0-2;Q9UMX0-4Sample_209 4.240866
298384Q9UP79 Sample_209 13.719190
298388Q9Y240 Sample_209 18.311750
298398Q9Y5Y7 Sample_209 52.686424

Check dimension of long format dataframe

Hide code cell source

dim(pred)
  1. 71601
  2. 3

Save predictions to disk

Hide code cell source

fname = file.path(folder_experiment,
                  'preds',
                  paste0('pred_all_', toupper(method), '.csv'))
write_csv(pred, path = fname)
fname
Warning message:
“The `path` argument of `write_csv()` is deprecated as of readr 1.4.0.
 Please use the `file` argument instead.”
'runs/alzheimer_study/preds/pred_all_IMPSEQROB.csv'