In [1]:
import os
import csv, sys
import io, textwrap, itertools
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
csv.field_size_limit(sys.maxsize)

common_amino_acid_set = {'R', 'X', 'S', 'G', 'W', 'I', 'Q', 'A', 'T', 'V', 'K', 'Y', 'C', 'N', 'L', 'F', 'D', 'M', 'P', 'H', 'E'}


def fasta_reader(handle, width=None):
    """
    Reads a FASTA file, yielding header, sequence pairs for each sequence recovered 适合大文件
    args:
        :handle (str, pathliob.Path, or file pointer) - fasta to read from
        :width (int or None) - formats the sequence to have max `width` character per line.
                               If <= 0, processed as None. If None, there is no max width.
    yields:
        :(header, sequence) tuples
    returns:
        :None
    """
    FASTA_STOP_CODON = "*"

    handle = handle if isinstance(handle, io.TextIOWrapper) else open(handle, 'r')
    width = width if isinstance(width, int) and width > 0 else None
    try:
        header = None
        for is_header, group in itertools.groupby(handle, lambda line: line.startswith(">")):
            if is_header:
                header = group.__next__().strip()
            else:
                seq = ''.join(line.strip() for line in group).strip().rstrip(FASTA_STOP_CODON)
                if width is not None:
                    seq = textwrap.fill(seq, width)
                yield header, seq
    except Exception as e:
        raise StopIteration
    finally:
        if not handle.closed:
            handle.close()

In [9]:
import os
ori_total = 0
name_list = ["ori", "40%", "50%", "60%", "70%", "80%", "90%"]
for name in name_list:
    for filename in os.listdir(name):
        if not filename.endswith(".fasta"):
            continue
        
        cnt = 0 
        for row in fasta_reader(os.path.join(name, filename)):
            cnt += 1
        if name == "ori":
            ori_total = cnt
        print("%s, cnt: %d, rate: %0.2f%%" % (name, cnt, (ori_total - cnt) * 100/ori_total))
        
    

ori, cnt: 214193, rate: 0.00%
40%, cnt: 5518, rate: 97.42%
50%, cnt: 15637, rate: 92.70%
60%, cnt: 32677, rate: 84.74%
70%, cnt: 57430, rate: 73.19%
80%, cnt: 89714, rate: 58.12%
90%, cnt: 134229, rate: 37.33%
