loadData()
loads :
the sample information related to an RNA-seq experiment ;
the coverage profile overlapping the locus specified by the user (computed from bam files) ;
the log2-FC per-base (computed from coverages).
Usage
loadData(
sampleInfo,
referenceCondition,
locus,
stranded = TRUE,
strandSpecific = 1,
fromBam = TRUE,
coverageType = "average",
readLength = 1,
isPairedEnd = FALSE,
nbThreads = 1,
verbose = TRUE,
featureCountsOtherParams = list()
)
Arguments
- sampleInfo
A
String
. Path to the file with sample information. The file includes the following columns:sample : identifier for the sample ;
condition : identifier for the condition ;
replicate : identifier for the replicate ;
bam : path to the bam file ;
coverage : path to the coverage file (in rds format).
- referenceCondition
A
String
. Reference condition of the RNA-Seq experiment. The reference condition specified by the user is set to the denominator of the log2-FC.- locus
A
List
. The list contains the coordinates for the target genomic region:seqid : a
String
. Chromosome identifier for the target genomic region ;chromStart : an
Integer
. Start position for the target genomic region ;chromEnd : an
Integer
. End position for the target genomic region.
- stranded
A
Boolean
. Indicate if the reads are stranded or not.- strandSpecific
Passed on to
Rsubread::featureCounts()
.- fromBam
A
Boolean
. Indicate if coverage profiles have to be computed from bam files or loaded from pre-computed rds files.- coverageType
A
String
. Select how to compute the coverage profiles:fivePrime : coverage profiles compute on 5' ends of reads ;
threePrime : coverage profiles compute on 3' ends of reads ;
average : coverageFactory coverage profile compute on average of 5' & 3' ends of reads ;
center : coverage profiles compute on center position of reads ;
fullLength : coverage profiles compute on full length reads.
- readLength
An
Integer
. The average length of reads from bam files.- isPairedEnd
Passed on to
Rsubread::featureCounts()
.- nbThreads
Passed on to
Rsubread::featureCounts()
.- verbose
A
Boolean
. Should all the operations performed be displayed ?- featureCountsOtherParams
A
List
. Other paramters passed on toRsubread::featureCounts()
.
Value
A List
. The list contains loaded data:
coverage profiles : a
List
of coverages asRle
;log2-FC : an
Rle
ofDouble
;stranded : see
loadData()
;locus : the target genomic region as
GRanges
;sampleInfo : see
loadData()
;referenceCondition : see
loadData()
;
Examples
# Create a working directory for running the example.
working_directory <- "./DIFFSEGR_TEST"
dir.create(working_directory, showWarnings = FALSE)
# Save sample information in a text file.
sample_info <- data.frame(
sample = c("pnp1_1_1", "pnp1_1_2", "wt_1", "wt_2"),
condition = rep(c("pnp1_1", "wt"), each = 2),
replicate = rep(1:2,2),
bam = sapply(
c("pnp1_1_1_ChrC_71950_78500.bam",
"pnp1_1_2_ChrC_71950_78500.bam",
"wt_1_ChrC_71950_78500.bam",
"wt_2_ChrC_71950_78500.bam"
),
function(bam) system.file("extdata", bam, package = "DiffSegR")
),
coverage = file.path(
working_directory,
paste0(c("pnp1_1_1", "pnp1_1_2", "wt_1", "wt_2"), ".rds")
)
)
print(sample_info)
#> sample condition replicate
#> pnp1_1_1_ChrC_71950_78500.bam pnp1_1_1 pnp1_1 1
#> pnp1_1_2_ChrC_71950_78500.bam pnp1_1_2 pnp1_1 2
#> wt_1_ChrC_71950_78500.bam wt_1 wt 1
#> wt_2_ChrC_71950_78500.bam wt_2 wt 2
#> bam
#> pnp1_1_1_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/pnp1_1_1_ChrC_71950_78500.bam
#> pnp1_1_2_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/pnp1_1_2_ChrC_71950_78500.bam
#> wt_1_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/wt_1_ChrC_71950_78500.bam
#> wt_2_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/wt_2_ChrC_71950_78500.bam
#> coverage
#> pnp1_1_1_ChrC_71950_78500.bam ./DIFFSEGR_TEST/pnp1_1_1.rds
#> pnp1_1_2_ChrC_71950_78500.bam ./DIFFSEGR_TEST/pnp1_1_2.rds
#> wt_1_ChrC_71950_78500.bam ./DIFFSEGR_TEST/wt_1.rds
#> wt_2_ChrC_71950_78500.bam ./DIFFSEGR_TEST/wt_2.rds
write.table(
sample_info,
file.path(working_directory, "sample_info.txt")
)
# Build coverages and log2-FC.
data <- loadData(
sampleInfo = file.path(working_directory,"sample_info.txt"),
locus = list(
seqid = "ChrC",
chromStart = 71950,
chromEnd = 78500
),
referenceCondition = "wt",
stranded = TRUE,
fromBam = TRUE,
nbThreads = 1
)
#> building coverage profiles ...
#>
#> ========== _____ _ _ ____ _____ ______ _____
#> ===== / ____| | | | _ \| __ \| ____| /\ | __ \
#> ===== | (___ | | | | |_) | |__) | |__ / \ | | | |
#> ==== \___ \| | | | _ <| _ /| __| / /\ \ | | | |
#> ==== ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
#> ========== |_____/ \____/|____/|_| \_\______/_/ \_\_____/
#> Rsubread 2.10.4
#>
#> //========================== featureCounts setting ===========================\\
#> || ||
#> || Input files : 4 BAM files ||
#> || ||
#> || pnp1_1_1_ChrC_71950_78500.bam ||
#> || pnp1_1_2_ChrC_71950_78500.bam ||
#> || wt_1_ChrC_71950_78500.bam ||
#> || wt_2_ChrC_71950_78500.bam ||
#> || ||
#> || Paired-end : no ||
#> || Count read pairs : no ||
#> || Annotation : R data.frame ||
#> || Dir for temp files : . ||
#> || Threads : 1 ||
#> || Level : meta-feature level ||
#> || Multimapping reads : counted ||
#> || Multi-overlapping reads : not counted ||
#> || Min overlapping bases : 1 ||
#> || Read reduction : to 3' end ||
#> || ||
#> \\============================================================================//
#>
#> //================================= Running ==================================\\
#> || ||
#> || Load annotation file .Rsubread_UserProvidedAnnotation_pid49270 ... ||
#> || Features : 13102 ||
#> || Meta-features : 13102 ||
#> || Chromosomes/contigs : 1 ||
#> || ||
#> || Process BAM file pnp1_1_1_ChrC_71950_78500.bam... ||
#> || Strand specific : stranded ||
#> || Single-end reads are included. ||
#> || Total alignments : 24260 ||
#> || Successfully assigned alignments : 24249 (100.0%) ||
#> || Running time : 0.00 minutes ||
#> || ||
#> || Process BAM file pnp1_1_2_ChrC_71950_78500.bam... ||
#> || Strand specific : stranded ||
#> || Single-end reads are included. ||
#> || Total alignments : 21100 ||
#> || Successfully assigned alignments : 21098 (100.0%) ||
#> || Running time : 0.00 minutes ||
#> || ||
#> || Process BAM file wt_1_ChrC_71950_78500.bam... ||
#> || Strand specific : stranded ||
#> || Single-end reads are included. ||
#> || Total alignments : 17533 ||
#> || Successfully assigned alignments : 17531 (100.0%) ||
#> || Running time : 0.00 minutes ||
#> || ||
#> || Process BAM file wt_2_ChrC_71950_78500.bam... ||
#> || Strand specific : stranded ||
#> || Single-end reads are included. ||
#> || Total alignments : 14070 ||
#> || Successfully assigned alignments : 14066 (100.0%) ||
#> || Running time : 0.00 minutes ||
#> || ||
#> || Write the final count table. ||
#> || Write the read assignment summary. ||
#> || ||
#> \\============================================================================//
#>
#>
#> ========== _____ _ _ ____ _____ ______ _____
#> ===== / ____| | | | _ \| __ \| ____| /\ | __ \
#> ===== | (___ | | | | |_) | |__) | |__ / \ | | | |
#> ==== \___ \| | | | _ <| _ /| __| / /\ \ | | | |
#> ==== ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
#> ========== |_____/ \____/|____/|_| \_\______/_/ \_\_____/
#> Rsubread 2.10.4
#>
#> //========================== featureCounts setting ===========================\\
#> || ||
#> || Input files : 4 BAM files ||
#> || ||
#> || pnp1_1_1_ChrC_71950_78500.bam ||
#> || pnp1_1_2_ChrC_71950_78500.bam ||
#> || wt_1_ChrC_71950_78500.bam ||
#> || wt_2_ChrC_71950_78500.bam ||
#> || ||
#> || Paired-end : no ||
#> || Count read pairs : no ||
#> || Annotation : R data.frame ||
#> || Dir for temp files : . ||
#> || Threads : 1 ||
#> || Level : meta-feature level ||
#> || Multimapping reads : counted ||
#> || Multi-overlapping reads : not counted ||
#> || Min overlapping bases : 1 ||
#> || Read reduction : to 5' end ||
#> || ||
#> \\============================================================================//
#>
#> //================================= Running ==================================\\
#> || ||
#> || Load annotation file .Rsubread_UserProvidedAnnotation_pid49270 ... ||
#> || Features : 13102 ||
#> || Meta-features : 13102 ||
#> || Chromosomes/contigs : 1 ||
#> || ||
#> || Process BAM file pnp1_1_1_ChrC_71950_78500.bam... ||
#> || Strand specific : stranded ||
#> || Single-end reads are included. ||
#> || Total alignments : 24260 ||
#> || Successfully assigned alignments : 24251 (100.0%) ||
#> || Running time : 0.00 minutes ||
#> || ||
#> || Process BAM file pnp1_1_2_ChrC_71950_78500.bam... ||
#> || Strand specific : stranded ||
#> || Single-end reads are included. ||
#> || Total alignments : 21100 ||
#> || Successfully assigned alignments : 21088 (99.9%) ||
#> || Running time : 0.00 minutes ||
#> || ||
#> || Process BAM file wt_1_ChrC_71950_78500.bam... ||
#> || Strand specific : stranded ||
#> || Single-end reads are included. ||
#> || Total alignments : 17533 ||
#> || Successfully assigned alignments : 17520 (99.9%) ||
#> || Running time : 0.00 minutes ||
#> || ||
#> || Process BAM file wt_2_ChrC_71950_78500.bam... ||
#> || Strand specific : stranded ||
#> || Single-end reads are included. ||
#> || Total alignments : 14070 ||
#> || Successfully assigned alignments : 14060 (99.9%) ||
#> || Running time : 0.00 minutes ||
#> || ||
#> || Write the final count table. ||
#> || Write the read assignment summary. ||
#> || ||
#> \\============================================================================//
#>
#> building log2-FC per-base ...
#> finished in 0.398390531539917 secs
print(data)
#> $coverages
#> $coverages$plus
#> $coverages$plus$pnp1_1_1
#> numeric-Rle of length 6551 with 5255 runs
#> Lengths: 86 1 3 1 ... 1 1 10
#> Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#>
#> $coverages$plus$pnp1_1_2
#> numeric-Rle of length 6551 with 5209 runs
#> Lengths: 89 1 2 1 ... 5 2 30
#> Values : 0.000000 0.732051 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#>
#> $coverages$plus$wt_1
#> numeric-Rle of length 6551 with 4610 runs
#> Lengths: 46 1 15 1 ... 23 1 14
#> Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#>
#> $coverages$plus$wt_2
#> numeric-Rle of length 6551 with 4601 runs
#> Lengths: 57 1 26 1 ... 22 1 2
#> Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#>
#>
#> $coverages$minus
#> $coverages$minus$pnp1_1_1
#> numeric-Rle of length 6551 with 526 runs
#> Lengths: 31 1 68 1 ... 6 1 1
#> Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#>
#> $coverages$minus$pnp1_1_2
#> numeric-Rle of length 6551 with 585 runs
#> Lengths: 406 1 42 1 ... 2 1 1
#> Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#>
#> $coverages$minus$wt_1
#> numeric-Rle of length 6551 with 463 runs
#> Lengths: 427 1 83 1 ... 4 1 2
#> Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#>
#> $coverages$minus$wt_2
#> numeric-Rle of length 6551 with 544 runs
#> Lengths: 458 1 63 1 ... 1 18 1
#> Values : 0.000000 0.414214 0.000000 0.414214 ... 0.414214 0.000000 0.732051
#>
#>
#>
#> $log2FoldChange
#> $log2FoldChange$plus
#> numeric-Rle of length 6551 with 6004 runs
#> Lengths: 46 1 10 ... 1 2
#> Values : 0.0000000 -0.2500000 0.0000000 ... -0.250000 0.000000
#>
#> $log2FoldChange$minus
#> numeric-Rle of length 6551 with 1239 runs
#> Lengths: 31 1 68 ... 1 1
#> Values : 0.00 0.25 0.00 ... 5.00000e-01 -3.96241e-01
#>
#>
#> $locus
#> GRanges object with 1 range and 0 metadata columns:
#> seqnames ranges strand
#> <Rle> <IRanges> <Rle>
#> [1] ChrC 71950-78500 *
#> -------
#> seqinfo: 1 sequence from an unspecified genome; no seqlengths
#>
#> $stranded
#> [1] TRUE
#>
#> $sampleInfo
#> sample condition replicate
#> pnp1_1_1_ChrC_71950_78500.bam pnp1_1_1 pnp1_1 1
#> pnp1_1_2_ChrC_71950_78500.bam pnp1_1_2 pnp1_1 2
#> wt_1_ChrC_71950_78500.bam wt_1 wt 1
#> wt_2_ChrC_71950_78500.bam wt_2 wt 2
#> bam
#> pnp1_1_1_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/pnp1_1_1_ChrC_71950_78500.bam
#> pnp1_1_2_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/pnp1_1_2_ChrC_71950_78500.bam
#> wt_1_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/wt_1_ChrC_71950_78500.bam
#> wt_2_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/wt_2_ChrC_71950_78500.bam
#> coverage
#> pnp1_1_1_ChrC_71950_78500.bam ./DIFFSEGR_TEST/pnp1_1_1.rds
#> pnp1_1_2_ChrC_71950_78500.bam ./DIFFSEGR_TEST/pnp1_1_2.rds
#> wt_1_ChrC_71950_78500.bam ./DIFFSEGR_TEST/wt_1.rds
#> wt_2_ChrC_71950_78500.bam ./DIFFSEGR_TEST/wt_2.rds
#>
#> $referenceCondition
#> [1] "wt"
#>
# delete working directory
unlink(working_directory, recursive = TRUE)