我是学习Python的初学者,需要一些帮助来尝试从多fasta文件中获取以下详细信息:
我似乎连 ORF 的查找都无法正确:')。我真的很感激我能得到的所有帮助。
我尝试列出每个序列中的 ORF,并通过首先制作一个字典来对它们进行计数,字典的键是以“>”开头的序列描述/标识符,值是序列本身。
我的代码:
def findcodon(file,frame):
f=open(file)
dictionary=makedictionary(file)
mlist = list(dictionary.values())
orfcount=0
startpos=0
endpos=0
stopcodon=['tga','tag','taa','TGA','TAG','TGA']
startcodon=['ATG','atg']
for i in mlist:
for j in range(frame,len(i),3):
codon=i[j:j+3]
if codon in startcodon:
startpos=int(j)
elif codon in stopcodon:
endpos=int(j)
orf=i[j][startpos:endpos]
orfcount+=1
print('orf',orfcount,':',orf)
print('total no.of orfs:',orfcount)
findcodon('myseq.txt',1)
我的输出:
orf 1 :
total no.of orfs: 1
orf 2 :
total no.of orfs: 2
orf 3 :
total no.of orfs: 3
orf 4 :
total no.of orfs: 4
orf 5 :
total no.of orfs: 5
def find_codon(file, frame):
# Open the file and read sequences into a dictionary
dictionary = {}
with open(file, 'r') as f:
identifier = ''
sequence = ''
for line in f:
line = line.strip()
if line.startswith('>'):
if identifier:
dictionary[identifier] = sequence
identifier = line
sequence = ''
else:
sequence += line
# Add the last sequence
if identifier:
dictionary[identifier] = sequence
# Define start and stop codons
stop_codons = {'tga', 'tag', 'taa'}
start_codon = 'atg'
# Iterate through each sequence
for identifier, sequence in dictionary.items():
orf_count = 0
longest_orf = ('', '', 0) # (ORF, position, length)
# Loop through each reading frame
for i in range(frame):
start_pos = None
for j in range(i, len(sequence), 3):
codon = sequence[j:j+3].lower()
if codon == start_codon:
start_pos = j
elif codon in stop_codons and start_pos is not None:
orf_count += 1
orf = sequence[start_pos:j+3]
orf_length = j - start_pos + 3
if orf_length > longest_orf[2]:
longest_orf = (orf, start_pos, orf_length)
print(f'ORF {orf_count}: {orf} at position {start_pos}')
start_pos = None
print(f'Total number of ORFs: {orf_count}')
if longest_orf[0]:
print(f'Longest ORF: {longest_orf[0]} at position {longest_orf[1]}')
# Example usage
find_codon('example_fasta.fasta', 3)