Combining Multiple Single cell RNA-seq Datasets: Part III

I combined individual expression data files from Fan et al., 2018 study to generate a Seurat project for downstream analysis.

Read and combine all gene expression files.

files <- list.files(path = "~/zhen/data/iPSC_backup/raw_data/", pattern = "*_TPM.txt",full.names = T)
df <- read.table(files[1], header = T, row.names = 1, stringsAsFactors = F)
rowNames <- rownames(df)
for(file in files[-1]){
  temp <- read.table(file, header = T, row.names = 1, stringsAsFactors = F)
  temp <- temp[rowNames,]
  df <- cbind(df, temp)
}

Prepare metadata

Take a look at the distribution of aligned reads

avg <- mean(log1p(meta$Aligned_Reads))
sd <- sd(log1p(meta$Aligned_Reads))
hist(log1p(meta$Aligned_Reads),breaks = 100)
abline(v = avg, lty = 2, col = "black")
abline(v = avg - sd, lty = 2, col = "green")
abline(v = avg - 3*sd, lty = 2, col =  "blue")

Create Seurat object with

meta <- meta[colnames(df),]
keep <- log1p(meta$Aligned_Reads) > avg - 3*sd
df0.fan <- CreateSeuratObject(df[,keep],meta.data = meta[keep,],
                              project = "Fan",
                              normalization.method = "LogNormalize")
df0.fan@meta.data$nUMI <- df0.fan@meta.data$Aligned_Reads
df0.fan@meta.data$Region <- df0.fan@meta.data$orig.ident
df0.fan@meta.data$orig.ident <- "Fan"
df0.fan@meta.data <- df0.fan@meta.data[,-c(4:8)]
LS0tCnRpdGxlOiAiUHJvY2VzcyBGYW4gZXQgYWwuIDIwMTggZGF0YXNldCIKZGF0ZTogMTEtMjctMjAxOApvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpJIGNvbWJpbmVkIGluZGl2aWR1YWwgZXhwcmVzc2lvbiBkYXRhIGZpbGVzIGZyb20gRmFuIGV0IGFsLiwgMjAxOCBzdHVkeSB0byBnZW5lcmF0ZSBhIFNldXJhdCBwcm9qZWN0IGZvciBkb3duc3RyZWFtIGFuYWx5c2lzLgoKYGBge3Igc2V0dXAsIGVjaG89RkFMU0V9CmtuaXRyOjpvcHRzX2tuaXQkc2V0KHJvb3QuZGlyID0gIn4vemhlbi9kYXRhL2hPcmcvIikKbGlicmFyeShTZXVyYXQpCmBgYAoKUmVhZCBhbmQgY29tYmluZSBhbGwgZ2VuZSBleHByZXNzaW9uIGZpbGVzLgpgYGB7cn0KZmlsZXMgPC0gbGlzdC5maWxlcyhwYXRoID0gIn4vemhlbi9kYXRhL2lQU0NfYmFja3VwL3Jhd19kYXRhLyIsIHBhdHRlcm4gPSAiKl9UUE0udHh0IixmdWxsLm5hbWVzID0gVCkKZGYgPC0gcmVhZC50YWJsZShmaWxlc1sxXSwgaGVhZGVyID0gVCwgcm93Lm5hbWVzID0gMSwgc3RyaW5nc0FzRmFjdG9ycyA9IEYpCnJvd05hbWVzIDwtIHJvd25hbWVzKGRmKQpmb3IoZmlsZSBpbiBmaWxlc1stMV0pewogIHRlbXAgPC0gcmVhZC50YWJsZShmaWxlLCBoZWFkZXIgPSBULCByb3cubmFtZXMgPSAxLCBzdHJpbmdzQXNGYWN0b3JzID0gRikKICB0ZW1wIDwtIHRlbXBbcm93TmFtZXMsXQogIGRmIDwtIGNiaW5kKGRmLCB0ZW1wKQp9CmBgYAoKUHJlcGFyZSBtZXRhZGF0YQpgYGB7cn0KbWV0YSA8LSByZWFkLmNzdigifi96aGVuL2RhdGEvaVBTQ19iYWNrdXAvcmF3X2RhdGEvR1NFMTAzNzIzX1JlZ2lvbl9TYW1wbGVfQmFyY29kZV9JbmZvcm1hdGlvbi5jc3YiLCBoZWFkZXIgPSBUKQpyb3duYW1lcyhtZXRhKSA8LSBtZXRhWywxXQpgYGAKClRha2UgYSBsb29rIGF0IHRoZSBkaXN0cmlidXRpb24gb2YgYWxpZ25lZCByZWFkcwpgYGB7ciBmaWcuaGVpZ2h0PSA3LCBmaWcud2lkdGg9N30KYXZnIDwtIG1lYW4obG9nMXAobWV0YSRBbGlnbmVkX1JlYWRzKSkKc2QgPC0gc2QobG9nMXAobWV0YSRBbGlnbmVkX1JlYWRzKSkKaGlzdChsb2cxcChtZXRhJEFsaWduZWRfUmVhZHMpLGJyZWFrcyA9IDEwMCkKYWJsaW5lKHYgPSBhdmcsIGx0eSA9IDIsIGNvbCA9ICJibGFjayIpCmFibGluZSh2ID0gYXZnIC0gc2QsIGx0eSA9IDIsIGNvbCA9ICJncmVlbiIpCmFibGluZSh2ID0gYXZnIC0gMypzZCwgbHR5ID0gMiwgY29sID0gICJibHVlIikKYGBgCgpDcmVhdGUgU2V1cmF0IG9iamVjdCB3aXRoIApgYGB7cn0KbWV0YSA8LSBtZXRhW2NvbG5hbWVzKGRmKSxdCmtlZXAgPC0gbG9nMXAobWV0YSRBbGlnbmVkX1JlYWRzKSA+IGF2ZyAtIDMqc2QKZGYwLmZhbiA8LSBDcmVhdGVTZXVyYXRPYmplY3QoZGZbLGtlZXBdLG1ldGEuZGF0YSA9IG1ldGFba2VlcCxdLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcHJvamVjdCA9ICJGYW4iLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbm9ybWFsaXphdGlvbi5tZXRob2QgPSAiTG9nTm9ybWFsaXplIikKZGYwLmZhbkBtZXRhLmRhdGEkblVNSSA8LSBkZjAuZmFuQG1ldGEuZGF0YSRBbGlnbmVkX1JlYWRzCmRmMC5mYW5AbWV0YS5kYXRhJFJlZ2lvbiA8LSBkZjAuZmFuQG1ldGEuZGF0YSRvcmlnLmlkZW50CmRmMC5mYW5AbWV0YS5kYXRhJG9yaWcuaWRlbnQgPC0gIkZhbiIKZGYwLmZhbkBtZXRhLmRhdGEgPC0gZGYwLmZhbkBtZXRhLmRhdGFbLC1jKDQ6OCldCmBgYAo=
comments powered by Disqus