Skip to content

Commit

Permalink
read-depth profiles
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasrausch committed Jul 20, 2020
1 parent 1ee020e commit 9dd7788
Show file tree
Hide file tree
Showing 9 changed files with 1,545 additions and 8 deletions.
43 changes: 43 additions & 0 deletions R/rd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
library(ggplot2)
library(scales)
library(gtable)
library(grid)

chrNamesLong = c("chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20", "chr21", "chr22", "chrX")
chrNamesShort = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","X")

args = commandArgs(trailingOnly=TRUE)
x = read.table(args[1], header=T)
maxCN = 8

# Fix chromosome ordering
if (sum(x$chr %in% chrNamesLong) > sum(x$chr %in% chrNamesShort)) { chrs = chrNamesLong; } else { chrs = chrNamesShort; }
x = x[x$chr %in% chrs,]
x$chr = factor(x$chr, levels=chrs)

# Whole genome
p = ggplot(data=x, aes(x=start, y=x[,6]))
p = p + geom_point(pch=21, size=0.5)
p = p + xlab("Chromosome")
p = p + ylab("Copy-number")
p = p + scale_x_continuous(labels=comma)
p = p + facet_grid(. ~ chr, scales="free_x", space="free_x")
p = p + ylim(0, maxCN)
p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
ggsave(p, file="plot.wholegenome.png", width=24, height=6)
print(warnings())

# By chromosome
for(chrname in unique(x$chr)) {
print(chrname)
sub = x[x$chr == chrname,]
p = ggplot(data=sub, aes(x=start, y=sub[,6]))
p = p + geom_point(pch=21, size=0.5)
p = p + ylab("Copy-number") + xlab(chrname)
p = p + scale_x_continuous(labels=comma, breaks = scales::pretty_breaks(n=20))
p = p + ylim(0, maxCN)
p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
ggsave(p, file=paste0("plot.", chrname, ".png"), width=24, height=6)
print(warnings())
}

18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,24 @@ Germline SV calling

`delly filter -f germline -o germline.bcf merged.bcf`


Read-depth profiles
-------------------

You can generate read-depth profiles with delly. This requires a mappability map which can be downloaded here:

[Mappability Maps](https://gear.embl.de/data/delly/)

The command to count reads in 10kbp windows and normalize the coverage is:

`delly rd -a -g hg19.fa -m hg19.map input.bam`

The output file can be plotted using R to generate normalized copy-number profiles:

`Rscript R/rd.R out.cov.gz`



FAQ
---
* What is the smallest SV size Delly can call?
Expand Down
119 changes: 119 additions & 0 deletions src/bed.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#ifndef BED_H
#define BED_H

#include <boost/filesystem.hpp>
#include <boost/multi_array.hpp>
#include <boost/progress.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/unordered_map.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/tokenizer.hpp>
#include <boost/math/distributions/chi_squared.hpp>
#include <boost/math/distributions/hypergeometric.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/zlib.hpp>

#include <htslib/faidx.h>
#include <htslib/vcf.h>
#include <htslib/sam.h>

namespace torali
{

// Flattens overlapping intervals
template<typename TRegionsGenome>
inline int32_t
_parseBedIntervals(std::string const& filename, bool const filePresent, bam_hdr_t* hdr, TRegionsGenome& bedRegions) {
typedef typename TRegionsGenome::value_type TChrIntervals;
typedef typename TChrIntervals::interval_type TIVal;

int32_t intervals = 0;
if (filePresent) {
bedRegions.resize(hdr->n_targets, TChrIntervals());
std::ifstream chrFile(filename.c_str(), std::ifstream::in);
if (chrFile.is_open()) {
while (chrFile.good()) {
std::string chrFromFile;
getline(chrFile, chrFromFile);
typedef boost::tokenizer< boost::char_separator<char> > Tokenizer;
boost::char_separator<char> sep(" \t,;");
Tokenizer tokens(chrFromFile, sep);
Tokenizer::iterator tokIter = tokens.begin();
if (tokIter!=tokens.end()) {
std::string chrName = *tokIter++;
int32_t tid = bam_name2id(hdr, chrName.c_str());
if (tid >= 0) {
if (tokIter!=tokens.end()) {
int32_t start = boost::lexical_cast<int32_t>(*tokIter++);
int32_t end = boost::lexical_cast<int32_t>(*tokIter++);
bedRegions[tid].insert(TIVal::right_open(start, end));
++intervals;
}
}
}
}
}
chrFile.close();
}
return intervals;
}


// Keeps overlapping intervals
template<typename TRegionsGenome>
inline int32_t
_parsePotOverlappingIntervals(std::string const& filename, bool const filePresent, bam_hdr_t* hdr, TRegionsGenome& bedRegions) {
typedef typename TRegionsGenome::value_type TChrIntervals;

int32_t intervals = 0;
if (filePresent) {
bedRegions.resize(hdr->n_targets, TChrIntervals());
std::ifstream chrFile(filename.c_str(), std::ifstream::in);
if (chrFile.is_open()) {
while (chrFile.good()) {
std::string chrFromFile;
getline(chrFile, chrFromFile);
typedef boost::tokenizer< boost::char_separator<char> > Tokenizer;
boost::char_separator<char> sep(" \t,;");
Tokenizer tokens(chrFromFile, sep);
Tokenizer::iterator tokIter = tokens.begin();
if (tokIter!=tokens.end()) {
std::string chrName = *tokIter++;
int32_t tid = bam_name2id(hdr, chrName.c_str());
if (tid >= 0) {
if (tokIter!=tokens.end()) {
int32_t start = boost::lexical_cast<int32_t>(*tokIter++);
int32_t end = boost::lexical_cast<int32_t>(*tokIter++);
bedRegions[tid].insert(std::make_pair(start, end));
++intervals;
}
}
}
}
}
chrFile.close();
}
return intervals;
}


template<typename TChrIntervals>
inline void
_mergeOverlappingBedEntries(TChrIntervals const& bedRegions, TChrIntervals& citv) {
typedef boost::icl::interval_set<uint32_t> TUniqueIntervals;
typedef typename TUniqueIntervals::interval_type TIVal;
TUniqueIntervals uitv;

// Insert intervals
for(typename TChrIntervals::const_iterator it = bedRegions.begin(); it != bedRegions.end(); ++it) uitv.insert(TIVal::right_open(it->first, it->second));

// Fetch unique intervals
for(typename TUniqueIntervals::iterator it = uitv.begin(); it != uitv.end(); ++it) citv.insert(std::make_pair(it->lower(), it->upper()));
}
}

#endif
Loading

0 comments on commit 9dd7788

Please sign in to comment.