import sys
import os
import logging
import argparse
def parse_arguments(program_version, arguments = sys.argv[1:]):
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
description="""SVIM (pronounced SWIM) is a structural variant caller for long reads.
It discriminates six different variant classes: deletions, tandem and interspersed duplications,
inversions, insertions and translocations. SVIM is unique in its capability of extracting both the genomic origin and
destination of duplications.
SVIM consists of four major steps:
- COLLECT detects signatures for SVs in long read alignments
- CLUSTER merges signatures that come from the same SV
- COMBINE combines clusters from different genomic regions and classifies them into distinct SV types
- GENOTYPE uses alignments spanning SVs to determine their genotype
SVIM can process two types of input. Firstly, it can detect SVs from raw reads by aligning them to a given reference genome first ("SVIM.py reads [options] working_dir reads genome").
Alternatively, it can detect SVs from existing reads alignments in SAM/BAM format ("SVIM.py alignment [options] working_dir bam_file").
""")
subparsers = parser.add_subparsers(help='modes', dest='sub')
parser.add_argument('--version',
'-v',
action='version',
version='%(prog)s {version}'.format(version=program_version))
parser_fasta = subparsers.add_parser('reads',
help='Detect SVs from raw reads. Align reads to given reference genome first.')
parser_fasta.add_argument('working_dir',
type=str,
help='Working and output directory. \
Existing files in the directory are overwritten. \
If the directory does not exist, it is created.')
parser_fasta.add_argument('reads',
type=str,
help='Read file (FASTA, FASTQ, gzipped FASTA, gzipped FASTQ or file list). \
The read file has to have one of the following supported file endings: \
FASTA: .fa, .fasta, .FA, .fa.gz, .fa.gzip, .fasta.gz, .fasta.gzip \
FASTQ: .fq, .fastq, .FQ, .fq.gz, .fq.gzip, .fastq.gz, .fastq.gzip \
FILE LIST: .fa.fn, fq.fn')
parser_fasta.add_argument('genome',
type=str,
help='Reference genome file (FASTA)')
parser_fasta.add_argument('--verbose',
action='store_true',
help='Enable more verbose logging (default: %(default)s)')
group_fasta_align = parser_fasta.add_argument_group('ALIGN')
group_fasta_align.add_argument('--cores',
type=int,
default=1,
help='CPU cores to use for the alignment (default: %(default)s)')
group_fasta_align.add_argument('--aligner',
type=str,
default="ngmlr",
choices=["ngmlr", "minimap2"],
help='Tool for read alignment: ngmlr or minimap2 (default: %(default)s)')
group_fasta_align.add_argument('--nanopore',
action='store_true',
help='Use Nanopore settings for read alignment (default: %(default)s)')
group_fasta_collect = parser_fasta.add_argument_group('COLLECT')
group_fasta_collect.add_argument('--min_mapq',
type=int,
default=20,
help='Minimum mapping quality of reads to consider (default: %(default)s). \
Reads with a lower mapping quality are ignored.')
group_fasta_collect.add_argument('--min_sv_size',
type=int,
default=40,
help='Minimum SV size to detect (default: %(default)s). \
SVIM can potentially detect events of any size but is limited by the \
signal-to-noise ratio in the input alignments. That means that more \
accurate reads and alignments enable the detection of smaller events. \
For current PacBio or Nanopore data, we would recommend a minimum size \
of 40bp or larger.')
group_fasta_collect.add_argument('--max_sv_size',
type=int,
default=100000,
help='Maximum SV size to detect (default: %(default)s). \
This parameter is used to distinguish long deletions (and inversions) from \
translocations which cannot be distinguished from the alignment alone. \
Split read segments mapping far apart on the reference could either \
indicate a very long deletion (inversion) or a translocation breakpoint. \
SVIM calls a translocation breakpoint if the mapping distance is larger \
than this parameter and a deletion (or inversion) if it is smaller or equal.')
group_fasta_collect.add_argument('--segment_gap_tolerance',
type=int,
default=10,
help='Maximum tolerated gap between adjacent alignment segments (default: %(default)s). \
This parameter applies to gaps on the reference and the read. Example: \
Deletions are detected from two subsequent segments of a split read that are mapped \
far apart from each other on the reference. The segment gap tolerance determines \
the maximum tolerated length of the read gap between both segments. If there is an \
unaligned read segment larger than this value between the two segments, no deletion is called.')
group_fasta_collect.add_argument('--segment_overlap_tolerance',
type=int,
default=5,
help='Maximum tolerated overlap between adjacent alignment segments (default: %(default)s). \
This parameter applies to overlaps on the reference and the read. Example: \
Deletions are detected from two subsequent segments of a split read that are mapped \
far apart from each other on the reference. The segment overlap tolerance determines \
the maximum tolerated length of an overlap between both segments on the read. If the \
overlap between the two segments on the read is larger than this value, no deletion is called.')
group_fasta_collect.add_argument('--all_bnds',
action='sto
没有合适的资源?快使用搜索试试~ 我知道了~
svim:长读的结构变异识别方法
共32个文件
py:25个
yml:1个
rst:1个
需积分: 50 2 下载量 10 浏览量
2021-05-15
08:56:34
上传
评论
收藏 191KB ZIP 举报
温馨提示
SVIM-使用长读的结构变体识别 SVIM(发音为SWIM )是用于长测序读取的结构变体调用程序。 它能够检测和分类以下六类结构变异:缺失,插入,倒位,串联重复,散布重复和易位。 SVIM还估计缺失,插入,倒位和穿插重复的基因型。 与其他方法不同,SVIM整合了来自整个基因组的信息,以精确区分相似事件,例如串联和穿插重复和简单插入。 在我们对来自PacBio和Nanopore测序仪的模拟数据和真实数据集进行的实验中,SVIM始终比竞争方法获得更好的结果。 笔记! 要分析单倍体或二倍体基因组装配体或重叠群,请使用其他方法 。 结构变体和长期阅读的背景 结构变体(SVs)通常定义为大于50bps的基因组变体(例如,缺失,重复,倒位)。 研究表明,与SNP或小的Indel相比,它们影响平均基因组中更多的碱基。 因此,它们对基因和调节区具有很大的影响。 这反映在大量与SV相关的遗传性疾病和其
资源详情
资源评论
资源推荐
收起资源包目录
svim-master.zip (32个子文件)
svim-master
.travis.yml 192B
README.rst 7KB
docs
SVclasses.png 135KB
LICENSE 34KB
src
svim
SVIM_COLLECT.py 7KB
SVIM_genotyping.py 5KB
SVIM_clustering.py 23KB
svim 12KB
SVIM_COMBINE.py 30KB
SVIM_input_parsing.py 44KB
SVIM_intra.py 2KB
__init__.py 0B
SVIM_alignment.py 3KB
SVSignature.py 13KB
SVIM_plot.py 6KB
SVIM_inter.py 23KB
SVCandidate.py 37KB
SVIM_CLUSTER.py 6KB
SVIM_merging.py 10KB
tests
test_consensus.py 2KB
test_Collect.py 14KB
test_satag.py 2KB
test_input_parsing.py 1KB
chimeric_read.sam 49KB
__init__.py 0B
test_inter.py 455B
test_SVCandidate.py 5KB
test_Signature.py 1KB
test_intra.py 994B
test_clustering.py 3KB
setup.py 2KB
.gitignore 1KB
共 32 条
- 1
weixin_42135073
- 粉丝: 29
- 资源: 4783
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
评论0