import os#载入os标准库 from Bio.Blast import NCBIXML#载入biopython的xml文件解析第三方库
defget_fasta_xml(path):#寻找符合fasta_xml文件 xml_list = [] fasta_list = [] for root, directory, files in os.walk(path): for fasta_name in files: name, _ = os.path.splitext(fasta_name) if fasta_name.endswith('.fasta'): fasta_list.append(name) for root, directory, files in os.walk(path): for file_name in files: name, _ = os.path.splitext(file_name) if name in fasta_list and file_name.endswith('xml'): xml_list.append(file_name) return xml_list
E_VALUE_THRESH = float(input("输入E值:"))#E值的确定
defanalysis_xml(xml_list):#利用NCBIXML的xml解析器分析文件信息 for filename in xml_list: result_handle = open(filename) blast_record = NCBIXML.read(result_handle) for alignment in blast_record.alignments: for hsp in alignment.hsps: if hsp.expect < E_VALUE_THRESH:#E值比较 print('****' + filename + '****') print('****Alignment****') print('sequence:', alignment.title) print('length:', alignment.length) print('e_value:', hsp.expect) print(hsp.query[0:75] + '...')#预览序列长度 print(hsp.match[0:75] + '...')#预览序列长度 print(hsp.sbjct[0:75] + '...')#预览序列长度 print('\n')
if __name__ == '__main__': analysis_xml(get_fasta_xml(".\\"))