Skip to content

mavis.constants

module responsible for small utility functions and constants used throughout the structural_variant package

PROGNAME

PROGNAME = 'mavis'

EXIT_OK

EXIT_OK = 0

EXIT_ERROR

EXIT_ERROR = 1

EXIT_INCOMPLETE

EXIT_INCOMPLETE = 2

COMPLETE_STAMP

COMPLETE_STAMP = 'MAVIS.COMPLETE'

SUBCOMMAND

SUBCOMMAND = MavisNamespace(
    ANNOTATE='annotate',
    VALIDATE='validate',
    SETUP='setup',
    SCHEDULE='schedule',
    CLUSTER='cluster',
    PAIR='pairing',
    SUMMARY='summary',
    CONFIG='config',
    CONVERT='convert',
    OVERLAY='overlay',
)

CODON_SIZE

CODON_SIZE = 3

GAP

GAP = '-'

ORIENT

ORIENT = MavisNamespace(LEFT='L', RIGHT='R', NS='?')

PROTOCOL

PROTOCOL = MavisNamespace(GENOME='genome', TRANS='transcriptome')

DISEASE_STATUS

DISEASE_STATUS = MavisNamespace(DISEASED='diseased', NORMAL='normal')

STRAND

STRAND = MavisNamespace(POS='+', NEG='-', NS='?')

SVTYPE

SVTYPE = MavisNamespace(
    DEL='deletion',
    TRANS='translocation',
    ITRANS='inverted translocation',
    INV='inversion',
    INS='insertion',
    DUP='duplication',
)

CIGAR

CIGAR = MavisNamespace(M=0, I=1, D=2, N=3, S=4, H=5, P=6, X=8, EQ=7)  # noqa
"""MavisNamespace: Enum-like. For readable cigar values

NA_MAPPING_QUALITY

NA_MAPPING_QUALITY = 255

PYSAM_READ_FLAGS

PYSAM_READ_FLAGS = MavisNamespace(
    REVERSE=16,
    MATE_REVERSE=32,
    UNMAPPED=4,
    MATE_UNMAPPED=8,
    FIRST_IN_PAIR=64,
    LAST_IN_PAIR=128,
    SECONDARY=256,
    MULTIMAP=1,
    SUPPLEMENTARY=2048,
    TARGETED_ALIGNMENT='ta',
    RECOMPUTED_CIGAR='rc',
    BLAT_RANK='br',
    BLAT_SCORE='bs',
    BLAT_ALIGNMENTS='ba',
    BLAT_PERCENT_IDENTITY='bi',
    BLAT_PMS='bp',
)

DNA_ALPHABET

DNA_ALPHABET = alphabet = Gapped(ambiguous_dna, '-')

alphabet

DNA_ALPHABET = alphabet = Gapped(ambiguous_dna, '-')

DNA_ALPHABET.match

DNA_ALPHABET.match = lambda x, y: _match_ambiguous_dna(x, y)

FLAGS

FLAGS = MavisNamespace(LQ='LOWQUAL')

READ_PAIR_TYPE

READ_PAIR_TYPE = MavisNamespace(RR='RR', LL='LL', RL='RL', LR='LR')

CALL_METHOD

CALL_METHOD = MavisNamespace(
    CONTIG='contig',
    SPLIT='split reads',
    FLANK='flanking reads',
    SPAN='spanning reads',
    INPUT='input',
)

GENE_PRODUCT_TYPE

GENE_PRODUCT_TYPE = MavisNamespace(SENSE='sense', ANTI_SENSE='anti-sense')

PRIME

PRIME = MavisNamespace(FIVE=5, THREE=3)

START_AA

START_AA = 'M'

STOP_AA

STOP_AA = '*'

GIEMSA_STAIN

GIEMSA_STAIN = MavisNamespace(
    GNEG='gneg',
    GPOS33='gpos33',
    GPOS50='gpos50',
    GPOS66='gpos66',
    GPOS75='gpos75',
    GPOS25='gpos25',
    GPOS100='gpos100',
    ACEN='acen',
    GVAR='gvar',
    STALK='stalk',
)

COLUMNS

COLUMNS = MavisNamespace(
    tracking_id='tracking_id',
    library='library',
    cluster_id='cluster_id',
    cluster_size='cluster_size',
    validation_id='validation_id',
    annotation_id='annotation_id',
    product_id='product_id',
    event_type='event_type',
    pairing='pairing',
    inferred_pairing='inferred_pairing',
    gene1='gene1',
    gene1_direction='gene1_direction',
    gene2='gene2',
    gene2_direction='gene2_direction',
    gene1_aliases='gene1_aliases',
    gene2_aliases='gene2_aliases',
    gene_product_type='gene_product_type',
    transcript1='transcript1',
    transcript2='transcript2',
    fusion_splicing_pattern='fusion_splicing_pattern',
    fusion_cdna_coding_start='fusion_cdna_coding_start',
    fusion_cdna_coding_end='fusion_cdna_coding_end',
    fusion_mapped_domains='fusion_mapped_domains',
    fusion_sequence_fasta_id='fusion_sequence_fasta_id',
    fusion_sequence_fasta_file='fusion_sequence_fasta_file',
    fusion_protein_hgvs='fusion_protein_hgvs',
    annotation_figure='annotation_figure',
    annotation_figure_legend='annotation_figure_legend',
    genes_encompassed='genes_encompassed',
    genes_overlapping_break1='genes_overlapping_break1',
    genes_overlapping_break2='genes_overlapping_break2',
    genes_proximal_to_break1='genes_proximal_to_break1',
    genes_proximal_to_break2='genes_proximal_to_break2',
    break1_chromosome='break1_chromosome',
    break1_position_start='break1_position_start',
    break1_position_end='break1_position_end',
    break1_orientation='break1_orientation',
    exon_last_5prime='exon_last_5prime',
    exon_first_3prime='exon_first_3prime',
    break1_strand='break1_strand',
    break1_seq='break1_seq',
    break2_chromosome='break2_chromosome',
    break2_position_start='break2_position_start',
    break2_position_end='break2_position_end',
    break2_orientation='break2_orientation',
    break2_strand='break2_strand',
    break2_seq='break2_seq',
    opposing_strands='opposing_strands',
    stranded='stranded',
    protocol='protocol',
    disease_status='disease_status',
    tools='tools',
    call_method='call_method',
    break1_ewindow='break1_ewindow',
    break1_ewindow_count='break1_ewindow_count',
    break1_ewindow_practical_coverage='break1_ewindow_practical_coverage',
    break1_homologous_seq='break1_homologous_seq',
    break1_split_read_names='break1_split_read_names',
    break1_split_reads='break1_split_reads',
    break1_split_reads_forced='break1_split_reads_forced',
    break2_ewindow='break2_ewindow',
    break2_ewindow_count='break2_ewindow_count',
    break2_ewindow_practical_coverage='break2_ewindow_practical_coverage',
    break2_homologous_seq='break2_homologous_seq',
    break2_split_read_names='break2_split_read_names',
    break2_split_reads='break2_split_reads',
    break2_split_reads_forced='break2_split_reads_forced',
    contig_alignment_query_consumption='contig_alignment_query_consumption',
    contig_alignment_score='contig_alignment_score',
    contig_alignment_query_name='contig_alignment_query_name',
    contig_read_depth='contig_read_depth',
    contig_break1_read_depth='contig_break1_read_depth',
    contig_break2_read_depth='contig_break2_read_depth',
    contig_alignment_rank='contig_alignment_rank',
    contig_build_score='contig_build_score',
    contig_remap_score='contig_remap_score',
    contig_remap_coverage='contig_remap_coverage',
    contig_remapped_read_names='contig_remapped_read_names',
    contig_remapped_reads='contig_remapped_reads',
    contig_seq='contig_seq',
    contig_strand_specific='contig_strand_specific',
    contigs_assembled='contigs_assembled',
    call_sequence_complexity='call_sequence_complexity',
    spanning_reads='spanning_reads',
    spanning_read_names='spanning_read_names',
    flanking_median_fragment_size='flanking_median_fragment_size',
    flanking_pairs='flanking_pairs',
    flanking_pairs_compatible='flanking_pairs_compatible',
    flanking_pairs_read_names='flanking_pairs_read_names',
    flanking_pairs_compatible_read_names='flanking_pairs_compatible_read_names',
    flanking_stdev_fragment_size='flanking_stdev_fragment_size',
    linking_split_read_names='linking_split_read_names',
    linking_split_reads='linking_split_reads',
    raw_break1_half_mapped_reads='raw_break1_half_mapped_reads',
    raw_break1_split_reads='raw_break1_split_reads',
    raw_break2_half_mapped_reads='raw_break2_half_mapped_reads',
    raw_break2_split_reads='raw_break2_split_reads',
    raw_flanking_pairs='raw_flanking_pairs',
    raw_spanning_reads='raw_spanning_reads',
    untemplated_seq='untemplated_seq',
    filter_comment='filter_comment',
    cdna_synon='cdna_synon',
    protein_synon='protein_synon',
    supplementary_call='supplementary_call',
    net_size='net_size',
    repeat_count='repeat_count',
    assumed_untemplated='assumed_untemplated',
)

class mavis.constants.MavisNamespace

Namespace to hold module constants

mavis.constants.MavisNamespace.__init__()

def __init__(self, *pos, **kwargs):

mavis.constants.MavisNamespace.discard()

Remove a variable if it exists

def discard(self, attr):

Args

  • attr

mavis.constants.MavisNamespace.get_env_name()

Get the name of the corresponding environment variable

def get_env_name(self, attr):

Args

  • attr

Examples

>>> nspace = MavisNamespace(a=1)
>>> nspace.get_env_name('a')
'MAVIS_A'

mavis.constants.MavisNamespace.get_env_var()

retrieve the environment variable definition of a given attribute

def get_env_var(self, attr):

Args

  • attr

mavis.constants.MavisNamespace.parse_listable_string()

Given some string, parse it into a list

@classmethod
def parse_listable_string(cls, string, cast_type=str, nullable=False):

Args

  • string
  • cast_type
  • nullable

Examples

>>> MavisNamespace.parse_listable_string('1,2,3', int)
[1, 2, 3]
>>> MavisNamespace.parse_listable_string('1;2,None', int, True)
[1, 2, None]

mavis.constants.MavisNamespace.copy_from()

Copy variables from one namespace onto the current namespace

def copy_from(self, source, attrs=None):

Args

  • source
  • attrs

mavis.constants.MavisNamespace.get()

get an attribute, return a default (if given) if the attribute does not exist

def get(self, key, *pos):

Args

  • key

Examples

>>> nspace = MavisNamespace(thing=1, otherthing=2)
>>> nspace.get('thing', 2)
1
>>> nspace.get('nonexistant_thing', 2)
2
>>> nspace.get('nonexistant_thing')
Traceback (most recent call last):
....

mavis.constants.MavisNamespace.keys()

get the attribute keys as a list

def keys(self):

Examples

>>> MavisNamespace(thing=1, otherthing=2).keys()
['thing', 'otherthing']

mavis.constants.MavisNamespace.values()

get the attribute values as a list

def values(self):

Examples

>>> MavisNamespace(thing=1, otherthing=2).values()
[1, 2]

mavis.constants.MavisNamespace.enforce()

checks that the current namespace has a given value

def enforce(self, value):

Args

  • value

Returns

: the input value

Raises

  • KeyError: the value did not exist

Examples

>>> nspace = MavisNamespace(thing=1, otherthing=2)
>>> nspace.enforce(1)
1
>>> nspace.enforce(3)
Traceback (most recent call last):
....

mavis.constants.MavisNamespace.reverse()

for a given value, return the associated key

def reverse(self, value):

Args

  • value: the value to get the key/attribute name for

Raises

  • KeyError: the value is not unique
  • KeyError: the value is not assigned

Examples

>>> nspace = MavisNamespace(thing=1, otherthing=2)
>>> nspace.reverse(1)
'thing'

mavis.constants.MavisNamespace.type()

returns the type

def type(self, attr, *pos):

Args

  • attr

Examples

>>> nspace = MavisNamespace(thing=1, otherthing=2)
>>> nspace.type('thing')
<class 'int'>

mavis.constants.MavisNamespace.define()

Get the definition of a given attribute or return a default (when given) if the attribute does not exist

def define(self, attr, *pos):

Args

  • attr

Returns

  • str: definition for the attribute

Raises

  • KeyError: the attribute does not exist and a default was not given

Examples

>>> nspace = MavisNamespace()
>>> nspace.add('thing', 1, defn='I am a thing')
>>> nspace.add('otherthing', 2)
>>> nspace.define('thing')
'I am a thing'
>>> nspace.define('otherthing')
Traceback (most recent call last):
....
>>> nspace.define('otherthing', 'I am some other thing')
'I am some other thing'

mavis.constants.MavisNamespace.add()

Add an attribute to the name space

def add(
    self,
    attr,
    value,
    defn=None,
    cast_type=None,
    nullable=False,
    env_overwritable=False,
    listable=False,
):

Args

  • attr (str): name of the attribute being added
  • value: the value of the attribute
  • defn (str): the definition, will be used in generating documentation and help menus
  • cast_type (Callable): the function to use in casting the value
  • nullable (bool): True if this attribute can have a None value
  • env_overwritable (bool): True if this attribute will be overriden by its environment variable equivalent
  • listable (bool): True if this attribute can have multiple values

Examples

>>> nspace = MavisNamespace()
>>> nspace.add('thing', 1, int, 'I am a thing')
>>> nspace = MavisNamespace()
>>> nspace.add('thing', 1, int)
>>> nspace = MavisNamespace()
>>> nspace.add('thing', 1)
>>> nspace = MavisNamespace()
>>> nspace.add('thing', value=1, cast_type=int, defn='I am a thing')

mavis.constants.float_fraction()

cast input to a float

def float_fraction(num):

Args

  • num: input to cast

Returns

: float

Raises

  • TypeError: if the input cannot be cast to a float or the number is not between 0 and 1

mavis.constants.reverse_complement()

wrapper for the Bio.Seq reverse_complement method

def reverse_complement(s):

Args

  • s (str): the input DNA sequence

Returns

  • str: the reverse complement of the input sequence

Examples

>>> reverse_complement('ATCCGGT')
'ACCGGAT'

Warning

assumes the input is a DNA sequence

mavis.constants.translate()

given a DNA sequence, translates it and returns the protein amino acid sequence

def translate(s, reading_frame=0):

Args

  • s (str): the input DNA sequence
  • reading_frame (int): where to start translating the sequence

Returns

  • str: the amino acid sequence