Skip to contents

loadData() loads :

  • the sample information related to an RNA-seq experiment ;

  • the coverage profile overlapping the locus specified by the user (computed from bam files) ;

  • the log2-FC per-base (computed from coverages).

Usage

loadData(
  sampleInfo,
  referenceCondition,
  locus,
  stranded = TRUE,
  strandSpecific = 1,
  fromBam = TRUE,
  coverageType = "average",
  readLength = 1,
  isPairedEnd = FALSE,
  nbThreads = 1,
  verbose = TRUE,
  featureCountsOtherParams = list()
)

Arguments

sampleInfo

A String. Path to the file with sample information. The file includes the following columns:

  • sample : identifier for the sample ;

  • condition : identifier for the condition ;

  • replicate : identifier for the replicate ;

  • bam : path to the bam file ;

  • coverage : path to the coverage file (in rds format).

referenceCondition

A String. Reference condition of the RNA-Seq experiment. The reference condition specified by the user is set to the denominator of the log2-FC.

locus

A List. The list contains the coordinates for the target genomic region:

  • seqid : a String. Chromosome identifier for the target genomic region ;

  • chromStart : an Integer. Start position for the target genomic region ;

  • chromEnd : an Integer. End position for the target genomic region.

stranded

A Boolean. Indicate if the reads are stranded or not.

strandSpecific

Passed on to Rsubread::featureCounts().

fromBam

A Boolean. Indicate if coverage profiles have to be computed from bam files or loaded from pre-computed rds files.

coverageType

A String. Select how to compute the coverage profiles:

  • fivePrime : coverage profiles compute on 5' ends of reads ;

  • threePrime : coverage profiles compute on 3' ends of reads ;

  • average : coverageFactory coverage profile compute on average of 5' & 3' ends of reads ;

  • center : coverage profiles compute on center position of reads ;

  • fullLength : coverage profiles compute on full length reads.

readLength

An Integer. The average length of reads from bam files.

isPairedEnd

Passed on to Rsubread::featureCounts().

nbThreads

Passed on to Rsubread::featureCounts().

verbose

A Boolean. Should all the operations performed be displayed ?

featureCountsOtherParams

A List. Other paramters passed on to Rsubread::featureCounts().

Value

A List. The list contains loaded data:

  • coverage profiles : a List of coverages as Rle;

  • log2-FC : an Rle of Double ;

  • stranded : see loadData() ;

  • locus : the target genomic region as GRanges;

  • sampleInfo : see loadData() ;

  • referenceCondition : see loadData() ;

Examples

# Create a working directory for running the example.
working_directory <- "./DIFFSEGR_TEST"
dir.create(working_directory, showWarnings = FALSE)

# Save sample information in a text file.
sample_info <- data.frame(
  sample    = c("pnp1_1_1", "pnp1_1_2", "wt_1", "wt_2"),
  condition = rep(c("pnp1_1", "wt"), each = 2),
  replicate = rep(1:2,2),
  bam       = sapply(
    c("pnp1_1_1_ChrC_71950_78500.bam", 
      "pnp1_1_2_ChrC_71950_78500.bam",
      "wt_1_ChrC_71950_78500.bam",
      "wt_2_ChrC_71950_78500.bam"
     ),
     function(bam) system.file("extdata", bam, package = "DiffSegR")
  ),
  coverage  = file.path(
    working_directory,
    paste0(c("pnp1_1_1", "pnp1_1_2", "wt_1", "wt_2"), ".rds")
  )
)
print(sample_info)
#>                                 sample condition replicate
#> pnp1_1_1_ChrC_71950_78500.bam pnp1_1_1    pnp1_1         1
#> pnp1_1_2_ChrC_71950_78500.bam pnp1_1_2    pnp1_1         2
#> wt_1_ChrC_71950_78500.bam         wt_1        wt         1
#> wt_2_ChrC_71950_78500.bam         wt_2        wt         2
#>                                                                                                                   bam
#> pnp1_1_1_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/pnp1_1_1_ChrC_71950_78500.bam
#> pnp1_1_2_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/pnp1_1_2_ChrC_71950_78500.bam
#> wt_1_ChrC_71950_78500.bam         /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/wt_1_ChrC_71950_78500.bam
#> wt_2_ChrC_71950_78500.bam         /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/wt_2_ChrC_71950_78500.bam
#>                                                   coverage
#> pnp1_1_1_ChrC_71950_78500.bam ./DIFFSEGR_TEST/pnp1_1_1.rds
#> pnp1_1_2_ChrC_71950_78500.bam ./DIFFSEGR_TEST/pnp1_1_2.rds
#> wt_1_ChrC_71950_78500.bam         ./DIFFSEGR_TEST/wt_1.rds
#> wt_2_ChrC_71950_78500.bam         ./DIFFSEGR_TEST/wt_2.rds
write.table(
  sample_info, 
  file.path(working_directory, "sample_info.txt")
)

# Build coverages and log2-FC.
data <- loadData(
  sampleInfo         = file.path(working_directory,"sample_info.txt"),
  locus        = list(
    seqid      = "ChrC", 
    chromStart = 71950, 
    chromEnd   = 78500
  ),
  referenceCondition = "wt",
  stranded           = TRUE,
  fromBam            = TRUE,
  nbThreads          = 1
)
#> building coverage profiles ...
#> 
#>         ==========     _____ _    _ ____  _____  ______          _____  
#>         =====         / ____| |  | |  _ \|  __ \|  ____|   /\   |  __ \ 
#>           =====      | (___ | |  | | |_) | |__) | |__     /  \  | |  | |
#>             ====      \___ \| |  | |  _ <|  _  /|  __|   / /\ \ | |  | |
#>               ====    ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
#>         ==========   |_____/ \____/|____/|_|  \_\______/_/    \_\_____/
#>        Rsubread 2.10.4
#> 
#> //========================== featureCounts setting ===========================\\
#> ||                                                                            ||
#> ||             Input files : 4 BAM files                                      ||
#> ||                                                                            ||
#> ||                           pnp1_1_1_ChrC_71950_78500.bam                    ||
#> ||                           pnp1_1_2_ChrC_71950_78500.bam                    ||
#> ||                           wt_1_ChrC_71950_78500.bam                        ||
#> ||                           wt_2_ChrC_71950_78500.bam                        ||
#> ||                                                                            ||
#> ||              Paired-end : no                                               ||
#> ||        Count read pairs : no                                               ||
#> ||              Annotation : R data.frame                                     ||
#> ||      Dir for temp files : .                                                ||
#> ||                 Threads : 1                                                ||
#> ||                   Level : meta-feature level                               ||
#> ||      Multimapping reads : counted                                          ||
#> || Multi-overlapping reads : not counted                                      ||
#> ||   Min overlapping bases : 1                                                ||
#> ||          Read reduction : to 3' end                                        ||
#> ||                                                                            ||
#> \\============================================================================//
#> 
#> //================================= Running ==================================\\
#> ||                                                                            ||
#> || Load annotation file .Rsubread_UserProvidedAnnotation_pid49270 ...         ||
#> ||    Features : 13102                                                        ||
#> ||    Meta-features : 13102                                                   ||
#> ||    Chromosomes/contigs : 1                                                 ||
#> ||                                                                            ||
#> || Process BAM file pnp1_1_1_ChrC_71950_78500.bam...                          ||
#> ||    Strand specific : stranded                                              ||
#> ||    Single-end reads are included.                                          ||
#> ||    Total alignments : 24260                                                ||
#> ||    Successfully assigned alignments : 24249 (100.0%)                       ||
#> ||    Running time : 0.00 minutes                                             ||
#> ||                                                                            ||
#> || Process BAM file pnp1_1_2_ChrC_71950_78500.bam...                          ||
#> ||    Strand specific : stranded                                              ||
#> ||    Single-end reads are included.                                          ||
#> ||    Total alignments : 21100                                                ||
#> ||    Successfully assigned alignments : 21098 (100.0%)                       ||
#> ||    Running time : 0.00 minutes                                             ||
#> ||                                                                            ||
#> || Process BAM file wt_1_ChrC_71950_78500.bam...                              ||
#> ||    Strand specific : stranded                                              ||
#> ||    Single-end reads are included.                                          ||
#> ||    Total alignments : 17533                                                ||
#> ||    Successfully assigned alignments : 17531 (100.0%)                       ||
#> ||    Running time : 0.00 minutes                                             ||
#> ||                                                                            ||
#> || Process BAM file wt_2_ChrC_71950_78500.bam...                              ||
#> ||    Strand specific : stranded                                              ||
#> ||    Single-end reads are included.                                          ||
#> ||    Total alignments : 14070                                                ||
#> ||    Successfully assigned alignments : 14066 (100.0%)                       ||
#> ||    Running time : 0.00 minutes                                             ||
#> ||                                                                            ||
#> || Write the final count table.                                               ||
#> || Write the read assignment summary.                                         ||
#> ||                                                                            ||
#> \\============================================================================//
#> 
#> 
#>         ==========     _____ _    _ ____  _____  ______          _____  
#>         =====         / ____| |  | |  _ \|  __ \|  ____|   /\   |  __ \ 
#>           =====      | (___ | |  | | |_) | |__) | |__     /  \  | |  | |
#>             ====      \___ \| |  | |  _ <|  _  /|  __|   / /\ \ | |  | |
#>               ====    ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
#>         ==========   |_____/ \____/|____/|_|  \_\______/_/    \_\_____/
#>        Rsubread 2.10.4
#> 
#> //========================== featureCounts setting ===========================\\
#> ||                                                                            ||
#> ||             Input files : 4 BAM files                                      ||
#> ||                                                                            ||
#> ||                           pnp1_1_1_ChrC_71950_78500.bam                    ||
#> ||                           pnp1_1_2_ChrC_71950_78500.bam                    ||
#> ||                           wt_1_ChrC_71950_78500.bam                        ||
#> ||                           wt_2_ChrC_71950_78500.bam                        ||
#> ||                                                                            ||
#> ||              Paired-end : no                                               ||
#> ||        Count read pairs : no                                               ||
#> ||              Annotation : R data.frame                                     ||
#> ||      Dir for temp files : .                                                ||
#> ||                 Threads : 1                                                ||
#> ||                   Level : meta-feature level                               ||
#> ||      Multimapping reads : counted                                          ||
#> || Multi-overlapping reads : not counted                                      ||
#> ||   Min overlapping bases : 1                                                ||
#> ||          Read reduction : to 5' end                                        ||
#> ||                                                                            ||
#> \\============================================================================//
#> 
#> //================================= Running ==================================\\
#> ||                                                                            ||
#> || Load annotation file .Rsubread_UserProvidedAnnotation_pid49270 ...         ||
#> ||    Features : 13102                                                        ||
#> ||    Meta-features : 13102                                                   ||
#> ||    Chromosomes/contigs : 1                                                 ||
#> ||                                                                            ||
#> || Process BAM file pnp1_1_1_ChrC_71950_78500.bam...                          ||
#> ||    Strand specific : stranded                                              ||
#> ||    Single-end reads are included.                                          ||
#> ||    Total alignments : 24260                                                ||
#> ||    Successfully assigned alignments : 24251 (100.0%)                       ||
#> ||    Running time : 0.00 minutes                                             ||
#> ||                                                                            ||
#> || Process BAM file pnp1_1_2_ChrC_71950_78500.bam...                          ||
#> ||    Strand specific : stranded                                              ||
#> ||    Single-end reads are included.                                          ||
#> ||    Total alignments : 21100                                                ||
#> ||    Successfully assigned alignments : 21088 (99.9%)                        ||
#> ||    Running time : 0.00 minutes                                             ||
#> ||                                                                            ||
#> || Process BAM file wt_1_ChrC_71950_78500.bam...                              ||
#> ||    Strand specific : stranded                                              ||
#> ||    Single-end reads are included.                                          ||
#> ||    Total alignments : 17533                                                ||
#> ||    Successfully assigned alignments : 17520 (99.9%)                        ||
#> ||    Running time : 0.00 minutes                                             ||
#> ||                                                                            ||
#> || Process BAM file wt_2_ChrC_71950_78500.bam...                              ||
#> ||    Strand specific : stranded                                              ||
#> ||    Single-end reads are included.                                          ||
#> ||    Total alignments : 14070                                                ||
#> ||    Successfully assigned alignments : 14060 (99.9%)                        ||
#> ||    Running time : 0.00 minutes                                             ||
#> ||                                                                            ||
#> || Write the final count table.                                               ||
#> || Write the read assignment summary.                                         ||
#> ||                                                                            ||
#> \\============================================================================//
#> 
#> building log2-FC per-base ...
#> finished in 0.398390531539917 secs
print(data)
#> $coverages
#> $coverages$plus
#> $coverages$plus$pnp1_1_1
#> numeric-Rle of length 6551 with 5255 runs
#>   Lengths:       86        1        3        1 ...        1        1       10
#>   Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#> 
#> $coverages$plus$pnp1_1_2
#> numeric-Rle of length 6551 with 5209 runs
#>   Lengths:       89        1        2        1 ...        5        2       30
#>   Values : 0.000000 0.732051 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#> 
#> $coverages$plus$wt_1
#> numeric-Rle of length 6551 with 4610 runs
#>   Lengths:       46        1       15        1 ...       23        1       14
#>   Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#> 
#> $coverages$plus$wt_2
#> numeric-Rle of length 6551 with 4601 runs
#>   Lengths:       57        1       26        1 ...       22        1        2
#>   Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#> 
#> 
#> $coverages$minus
#> $coverages$minus$pnp1_1_1
#> numeric-Rle of length 6551 with 526 runs
#>   Lengths:       31        1       68        1 ...        6        1        1
#>   Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#> 
#> $coverages$minus$pnp1_1_2
#> numeric-Rle of length 6551 with 585 runs
#>   Lengths:      406        1       42        1 ...        2        1        1
#>   Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#> 
#> $coverages$minus$wt_1
#> numeric-Rle of length 6551 with 463 runs
#>   Lengths:      427        1       83        1 ...        4        1        2
#>   Values : 0.000000 0.414214 0.000000 0.414214 ... 0.000000 0.414214 0.000000
#> 
#> $coverages$minus$wt_2
#> numeric-Rle of length 6551 with 544 runs
#>   Lengths:      458        1       63        1 ...        1       18        1
#>   Values : 0.000000 0.414214 0.000000 0.414214 ... 0.414214 0.000000 0.732051
#> 
#> 
#> 
#> $log2FoldChange
#> $log2FoldChange$plus
#> numeric-Rle of length 6551 with 6004 runs
#>   Lengths:         46          1         10 ...          1          2
#>   Values :  0.0000000 -0.2500000  0.0000000 ...  -0.250000   0.000000
#> 
#> $log2FoldChange$minus
#> numeric-Rle of length 6551 with 1239 runs
#>   Lengths:           31            1           68 ...            1            1
#>   Values :         0.00         0.25         0.00 ...  5.00000e-01 -3.96241e-01
#> 
#> 
#> $locus
#> GRanges object with 1 range and 0 metadata columns:
#>       seqnames      ranges strand
#>          <Rle>   <IRanges>  <Rle>
#>   [1]     ChrC 71950-78500      *
#>   -------
#>   seqinfo: 1 sequence from an unspecified genome; no seqlengths
#> 
#> $stranded
#> [1] TRUE
#> 
#> $sampleInfo
#>                                 sample condition replicate
#> pnp1_1_1_ChrC_71950_78500.bam pnp1_1_1    pnp1_1         1
#> pnp1_1_2_ChrC_71950_78500.bam pnp1_1_2    pnp1_1         2
#> wt_1_ChrC_71950_78500.bam         wt_1        wt         1
#> wt_2_ChrC_71950_78500.bam         wt_2        wt         2
#>                                                                                                                   bam
#> pnp1_1_1_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/pnp1_1_1_ChrC_71950_78500.bam
#> pnp1_1_2_ChrC_71950_78500.bam /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/pnp1_1_2_ChrC_71950_78500.bam
#> wt_1_ChrC_71950_78500.bam         /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/wt_1_ChrC_71950_78500.bam
#> wt_2_ChrC_71950_78500.bam         /tmp/RtmpXaWfVn/temp_libpathc03c318e5828/DiffSegR/extdata/wt_2_ChrC_71950_78500.bam
#>                                                   coverage
#> pnp1_1_1_ChrC_71950_78500.bam ./DIFFSEGR_TEST/pnp1_1_1.rds
#> pnp1_1_2_ChrC_71950_78500.bam ./DIFFSEGR_TEST/pnp1_1_2.rds
#> wt_1_ChrC_71950_78500.bam         ./DIFFSEGR_TEST/wt_1.rds
#> wt_2_ChrC_71950_78500.bam         ./DIFFSEGR_TEST/wt_2.rds
#> 
#> $referenceCondition
#> [1] "wt"
#> 

# delete working directory 
unlink(working_directory, recursive = TRUE)