Skip to content

mavis.bam.stats

os.environ[OMP_NUM_THREADS]

os.environ["OMP_NUM_THREADS"] = "4"  # export OMP_NUM_THREADS=4

os.environ[OPENBLAS_NUM_THREADS]

os.environ["OPENBLAS_NUM_THREADS"] = "4"  # export OPENBLAS_NUM_THREADS=4

os.environ[MKL_NUM_THREADS]

os.environ["MKL_NUM_THREADS"] = "4"  # export MKL_NUM_THREADS=6

os.environ[VECLIB_MAXIMUM_THREADS]

os.environ["VECLIB_MAXIMUM_THREADS"] = "4"  # export VECLIB_MAXIMUM_THREADS=4

os.environ[NUMEXPR_NUM_THREADS]

os.environ["NUMEXPR_NUM_THREADS"] = "6"  # export NUMEXPR_NUM_THREADS=6

class mavis.bam.stats.BamStats

class mavis.bam.stats.Histogram

inherits dict

mavis.bam.stats.Histogram.add()

add a key to the histogram with a default frequency of 1

def add(self, item, freq=1):

Args

  • item
  • freq

mavis.bam.stats.Histogram.median()

flattens the histogram to compute the median value

def median(self):

mavis.bam.stats.compute_transcriptome_bam_stats()

computes various statistical measures relating the input bam file

def compute_transcriptome_bam_stats(
    bam_cache,
    annotations,
    sample_size,
    min_mapping_quality=1,
    stranded=True,
    sample_cap=10000,
    distribution_fraction=0.97,
):

Args

  • bam_cache
  • annotations (object): see :func:mavis.annotate.load_reference_genes
  • sample_size (int): the number of genes to compute stats over
  • min_mapping_quality (int): the minimum mapping quality for a read to be used
  • stranded (bool): if True then reads must match the gene strand
  • sample_cap (int): maximum number of reads to collect for any given sample region
  • distribution_fraction (float): the proportion of the distribution to use in computing stdev

Returns

  • BamStats: the fragment size median, stdev and the read length in a object

mavis.bam.stats.compute_genome_bam_stats()

computes various statistical measures relating the input bam file

def compute_genome_bam_stats(
    bam_file_handle,
    sample_bin_size,
    sample_size,
    min_mapping_quality=1,
    sample_cap=10000,
    distribution_fraction=0.99,
):

Args

  • bam_file_handle (pysam.AlignmentFile): the input bam file handle
  • sample_bin_size (int): how large to make the sample bin (in bp)
  • sample_size (int): the number of genes to compute stats over
  • min_mapping_quality (int): the minimum mapping quality for a read to be used
  • sample_cap (int): maximum number of reads to collect for any given sample region
  • distribution_fraction (float): the proportion of the distribution to use in computing stdev

Returns

  • BamStats: the fragment size median, stdev and the read length in a object