mavis.bam.stats
os.environ[OMP_NUM_THREADS]
os.environ["OMP_NUM_THREADS"] = "4" # export OMP_NUM_THREADS=4
os.environ[OPENBLAS_NUM_THREADS]
os.environ["OPENBLAS_NUM_THREADS"] = "4" # export OPENBLAS_NUM_THREADS=4
os.environ[MKL_NUM_THREADS]
os.environ["MKL_NUM_THREADS"] = "4" # export MKL_NUM_THREADS=6
os.environ[VECLIB_MAXIMUM_THREADS]
os.environ["VECLIB_MAXIMUM_THREADS"] = "4" # export VECLIB_MAXIMUM_THREADS=4
os.environ[NUMEXPR_NUM_THREADS]
os.environ["NUMEXPR_NUM_THREADS"] = "6" # export NUMEXPR_NUM_THREADS=6
class mavis.bam.stats.BamStats
class mavis.bam.stats.Histogram
inherits dict
mavis.bam.stats.Histogram.add()
add a key to the histogram with a default frequency of 1
def add(self, item, freq=1):
Args
- item
- freq
mavis.bam.stats.Histogram.median()
flattens the histogram to compute the median value
def median(self):
mavis.bam.stats.compute_transcriptome_bam_stats()
computes various statistical measures relating the input bam file
def compute_transcriptome_bam_stats(
bam_cache,
annotations,
sample_size,
min_mapping_quality=1,
stranded=True,
sample_cap=10000,
distribution_fraction=0.97,
):
Args
- bam_cache
- annotations (
object
): see :func:mavis.annotate.load_reference_genes
- sample_size (
int
): the number of genes to compute stats over - min_mapping_quality (
int
): the minimum mapping quality for a read to be used - stranded (
bool
): if True then reads must match the gene strand - sample_cap (
int
): maximum number of reads to collect for any given sample region - distribution_fraction (
float
): the proportion of the distribution to use in computing stdev
Returns
BamStats
: the fragment size median, stdev and the read length in a object
mavis.bam.stats.compute_genome_bam_stats()
computes various statistical measures relating the input bam file
def compute_genome_bam_stats(
bam_file_handle,
sample_bin_size,
sample_size,
min_mapping_quality=1,
sample_cap=10000,
distribution_fraction=0.99,
):
Args
- bam_file_handle (
pysam.AlignmentFile
): the input bam file handle - sample_bin_size (
int
): how large to make the sample bin (in bp) - sample_size (
int
): the number of genes to compute stats over - min_mapping_quality (
int
): the minimum mapping quality for a read to be used - sample_cap (
int
): maximum number of reads to collect for any given sample region - distribution_fraction (
float
): the proportion of the distribution to use in computing stdev
Returns
BamStats
: the fragment size median, stdev and the read length in a object