Number of rows and columns in the sample annotation: 100, 19

Sample annotation

DT::datatable(sa, filter = 'top')

Quality control checks

# check for duplicated rows
if(sum(duplicated(sa)) > 0){
  print("The sample annotation has the following duplicated rows. Remove them.")
  sa[duplicated(sa)]
}

Check for RNA_IDs without a value

if(nrow(sa[is.na(RNA_ID)]) > 0){
  print("The sample annotation has some non-existent RNA_IDs. Fill them.")
  sa[is.na(RNA_ID)]
}

Check for nonexistent BAM files

sa[, aux1 := file.exists(RNA_BAM_FILE)]
if(any(sa$aux1 == F)){
  print('The following BAM files do not exist: ')
  DT::datatable(sa[aux1 == F])
}

Check for nonexistent VCF files

if(! all(sa[,is.na(DNA_VCF_FILE)])){
  sa[, aux1 := file.exists(DNA_VCF_FILE)]
  if(any(sa$aux1 == F)){
    print('The following VCF files do not exist: ')
    DT::datatable(sa[aux1 == F])
  }
}

Check for RNA_IDs with more than one RNA_BAM_FILE

if(sum(duplicated(unique(sa[,.(RNA_ID, RNA_BAM_FILE)])$RNA_ID)) > 0){
  print('The following RNA_IDs and RNA_BAM_FILEs do not have a 1:1 match. Correct them.')
  DT::datatable(duplicated(unique(sa[,.(RNA_ID, RNA_BAM_FILE)])$RNA_ID))
}

Barplot with DROP groups

unique(sa[,.(RNA_ID, DROP_GROUP)])$DROP_GROUP %>% strsplit(',') %>% unlist %>%
  table %>% barplot(xlab = 'DROP groups', ylab = 'Number of samples')

# Obtain genes that overlap with HPO terms
IyctLS0KIycgdGl0bGU6IFNhbXBsZSBBbm5vdGF0aW9uIE92ZXJ2aWV3CiMnIGF1dGhvcjoKIycgd2I6CiMnICBsb2c6CiMnICAgLSBzbmFrZW1ha2U6ICdgc20gc3RyKHRtcF9kaXIgLyAiU2FtcGxlQW5ub3RhdGlvbi5SZHMiKWAnCiMnICBwYXJhbXM6CiMnICAgLSBocG9GaWxlOiAnYHNtIGNmZy5nZXQoImhwb0ZpbGUiKWAnCiMnICBpbnB1dDogCiMnICAgLSBzYW1wbGVBbm5vdGF0aW9uOiAnYHNtIHNhLmZpbGVgJwojJyAgb3V0cHV0OgojJyAgIC0gaHBvT3ZlcmxhcDogJ2BzbSB0b3VjaChjZmcuZ2V0UHJvY2Vzc2VkRGF0YURpcigpICsgIi9zYW1wbGVfYW5uby9nZW5lc19vdmVybGFwcGluZ19IUE9fdGVybXMudHN2IilgJwojJyBvdXRwdXQ6CiMnICAgaHRtbF9kb2N1bWVudDoKIycgICAgY29kZV9mb2xkaW5nOiBoaWRlCiMnICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKIyctLS0KCiMrZWNobz1GCnNhdmVSRFMoc25ha2VtYWtlLCBzbmFrZW1ha2VAbG9nJHNuYWtlbWFrZSkKCnN1cHByZXNzUGFja2FnZVN0YXJ0dXBNZXNzYWdlcyh7CiAgbGlicmFyeShkYXRhLnRhYmxlKQogIGxpYnJhcnkobWFncml0dHIpCiAgbGlicmFyeSh0aWR5cikKfSkKCnNhIDwtIGZyZWFkKHNuYWtlbWFrZUBpbnB1dCRzYW1wbGVBbm5vdGF0aW9uKQoKIycKIycgTnVtYmVyIG9mIHJvd3MgYW5kIGNvbHVtbnMgaW4gdGhlIHNhbXBsZSBhbm5vdGF0aW9uOiBgciBkaW0oc2EpYAoKIycKIycgIyMgU2FtcGxlIGFubm90YXRpb24KRFQ6OmRhdGF0YWJsZShzYSwgZmlsdGVyID0gJ3RvcCcpCgojJyAjIyBRdWFsaXR5IGNvbnRyb2wgY2hlY2tzCgojIGNoZWNrIGZvciBkdXBsaWNhdGVkIHJvd3MKaWYoc3VtKGR1cGxpY2F0ZWQoc2EpKSA+IDApewogIHByaW50KCJUaGUgc2FtcGxlIGFubm90YXRpb24gaGFzIHRoZSBmb2xsb3dpbmcgZHVwbGljYXRlZCByb3dzLiBSZW1vdmUgdGhlbS4iKQogIHNhW2R1cGxpY2F0ZWQoc2EpXQp9CgojJyBDaGVjayBmb3IgUk5BX0lEcyB3aXRob3V0IGEgdmFsdWUKaWYobnJvdyhzYVtpcy5uYShSTkFfSUQpXSkgPiAwKXsKICBwcmludCgiVGhlIHNhbXBsZSBhbm5vdGF0aW9uIGhhcyBzb21lIG5vbi1leGlzdGVudCBSTkFfSURzLiBGaWxsIHRoZW0uIikKICBzYVtpcy5uYShSTkFfSUQpXQp9CgoKIycgQ2hlY2sgZm9yIG5vbmV4aXN0ZW50IEJBTSBmaWxlcwpzYVssIGF1eDEgOj0gZmlsZS5leGlzdHMoUk5BX0JBTV9GSUxFKV0KaWYoYW55KHNhJGF1eDEgPT0gRikpewogIHByaW50KCdUaGUgZm9sbG93aW5nIEJBTSBmaWxlcyBkbyBub3QgZXhpc3Q6ICcpCiAgRFQ6OmRhdGF0YWJsZShzYVthdXgxID09IEZdKQp9CgojJyBDaGVjayBmb3Igbm9uZXhpc3RlbnQgVkNGIGZpbGVzCmlmKCEgYWxsKHNhWyxpcy5uYShETkFfVkNGX0ZJTEUpXSkpewogIHNhWywgYXV4MSA6PSBmaWxlLmV4aXN0cyhETkFfVkNGX0ZJTEUpXQogIGlmKGFueShzYSRhdXgxID09IEYpKXsKICAgIHByaW50KCdUaGUgZm9sbG93aW5nIFZDRiBmaWxlcyBkbyBub3QgZXhpc3Q6ICcpCiAgICBEVDo6ZGF0YXRhYmxlKHNhW2F1eDEgPT0gRl0pCiAgfQp9CgojJyBDaGVjayBmb3IgUk5BX0lEcyB3aXRoIG1vcmUgdGhhbiBvbmUgUk5BX0JBTV9GSUxFCmlmKHN1bShkdXBsaWNhdGVkKHVuaXF1ZShzYVssLihSTkFfSUQsIFJOQV9CQU1fRklMRSldKSRSTkFfSUQpKSA+IDApewogIHByaW50KCdUaGUgZm9sbG93aW5nIFJOQV9JRHMgYW5kIFJOQV9CQU1fRklMRXMgZG8gbm90IGhhdmUgYSAxOjEgbWF0Y2guIENvcnJlY3QgdGhlbS4nKQogIERUOjpkYXRhdGFibGUoZHVwbGljYXRlZCh1bmlxdWUoc2FbLC4oUk5BX0lELCBSTkFfQkFNX0ZJTEUpXSkkUk5BX0lEKSkKfQoKIycgIyMgQmFycGxvdCB3aXRoIERST1AgZ3JvdXBzCnVuaXF1ZShzYVssLihSTkFfSUQsIERST1BfR1JPVVApXSkkRFJPUF9HUk9VUCAlPiUgc3Ryc3BsaXQoJywnKSAlPiUgdW5saXN0ICU+JQogIHRhYmxlICU+JSBiYXJwbG90KHhsYWIgPSAnRFJPUCBncm91cHMnLCB5bGFiID0gJ051bWJlciBvZiBzYW1wbGVzJykKCiMgT2J0YWluIGdlbmVzIHRoYXQgb3ZlcmxhcCB3aXRoIEhQTyB0ZXJtcwojK2VjaG89RgppZighaXMubnVsbChzYSRIUE9fVEVSTVMpICYgIWFsbChpcy5uYShzYSRIUE9fVEVSTVMpKSAmICEgYWxsKHNhJEhQT19URVJNUyA9PSAnJykpewogIHNhMiA8LSBzYVssIC5TRFsxXSwgYnkgPSBSTkFfSURdCiAgCiAgZmlsZW5hbWUgPC0gaWZlbHNlKGlzLm51bGwoc25ha2VtYWtlQHBhcmFtcyRocG9fZmlsZSksIAogICAgICAgICAgICAgICAgICAgICAnaHR0cHM6Ly93d3cuY21tLmluLnR1bS5kZS9wdWJsaWMvcGFwZXIvZHJvcF9hbmFseXNpcy9yZXNvdXJjZS9ocG9fZ2VuZXMudHN2Lmd6JywKICAgICAgICAgICAgICAgICAgICAgaHBvX2ZpbGUpCiAgaHBvX2R0IDwtIGZyZWFkKGZpbGVuYW1lKQogIAogIHNhcHBseSgxOm5yb3coc2EyKSwgZnVuY3Rpb24oaSl7CiAgICBocG9zIDwtIHN0cnNwbGl0KHNhMltpLCBIUE9fVEVSTVNdLCBzcGxpdCA9ICcsJykgJT4lIHVubGlzdAogICAgZ2VuZXMgPC0gcGFzdGUoc29ydChocG9fZHRbSFBPX2lkICVpbiUgaHBvcywgaGduY1N5bWJvbF0pLCBjb2xsYXBzZSA9ICcsJykKICAgIHNldChzYTIsIGksICdIUE9fbWF0Y2hpbmdfZ2VuZXMnLCB2YWx1ZSA9IGdlbmVzKQogIH0pICU+JSBpbnZpc2libGUoKSAgIyBkb24ndCBwcmludCByZXN1bHQKICBzYTIgPC0gc2EyWywgLihSTkFfSUQsIEhQT19tYXRjaGluZ19nZW5lcyldCiAgCiAgZndyaXRlKHNhMiwgc25ha2VtYWtlQG91dHB1dCRocG9PdmVybGFwLAogICAgICAgICBuYSA9IE5BLCBzZXAgPSAiXHQiLCByb3cubmFtZXMgPSBGLCBxdW90ZSA9IEYpCiAgfQoK