\
All sequence and alignment objects have a molecular type, or MolType which provides key properties for validating sequence characters. Here we use the DNA MolType to create a DNA sequence.
>>> from cogent import DNA
>>> my_seq = DNA.makeSequence("AGTACACTGGT")
>>> my_seq
DnaSequence(AGTACAC... 11)
>>> print my_seq
AGTACACTGGT
>>> str(my_seq)
'AGTACACTGGT'
>>> from cogent import RNA
>>> rnaseq = RNA.makeSequence('ACGUACGUACGUACGU')
>>> from cogent import DNA
>>> my_seq = DNA.makeSequence('AGTACACTGGT')
>>> print my_seq.toFasta()
>0
AGTACACTGGT
>>> from cogent import RNA
>>> rnaseq = RNA.makeSequence('ACGUACGUACGUACGU')
>>> rnaseq.toFasta()
'>0\nACGUACGUACGUACGU'
>>> from cogent import DNA
>>> my_seq = DNA.makeSequence('AGTACACTGGT','my_gene')
>>> my_seq
DnaSequence(AGTACAC... 11)
>>> type(my_seq)
<class 'cogent.core.sequence.DnaSequence'>
>>> from cogent import DNA
>>> my_seq = DNA.makeSequence('AGTACACTGGT')
>>> my_seq.Name = 'my_gene'
>>> print my_seq.toFasta()
>my_gene
AGTACACTGGT
>>> from cogent import DNA
>>> my_seq = DNA.makeSequence("AGTACACTGGT")
>>> print my_seq.complement()
TCATGTGACCA
>>> print my_seq.reversecomplement()
ACCAGTGTACT
The rc method name is easier to type
>>> print my_seq.rc()
ACCAGTGTACT
>>> from cogent import DNA
>>> my_seq = DNA.makeSequence('GCTTGGGAAAGTCAAATGGAA','protein-X')
>>> pep = my_seq.getTranslation()
>>> type(pep)
<class 'cogent.core.sequence.ProteinSequence'>
>>> print pep.toFasta()
>protein-X
AWESQME
>>> from cogent import DNA
>>> my_seq = DNA.makeSequence('ACGTACGTACGTACGT')
>>> print my_seq.toRna()
ACGUACGUACGUACGU
>>> from cogent import RNA
>>> rnaseq = RNA.makeSequence('ACGUACGUACGUACGU')
>>> print rnaseq.toDna()
ACGTACGTACGTACGT
>>> from cogent import DNA
>>> a = DNA.makeSequence("AGTACACTGGT")
>>> a.canPair(a.complement())
False
>>> a.canPair(a.reversecomplement())
True
>>> from cogent import DNA
>>> my_seq = DNA.makeSequence("AGTACACTGGT")
>>> extra_seq = DNA.makeSequence("CTGAC")
>>> long_seq = my_seq + extra_seq
>>> long_seq
DnaSequence(AGTACAC... 16)
>>> str(long_seq)
'AGTACACTGGTCTGAC'
>>> my_seq[1:6]
DnaSequence(GTACA)
We’ll do this by specifying the position indices of interest, creating a sequence Feature and using that to extract the positions.
>>> from cogent import DNA
>>> seq = DNA.makeSequence('ATGATGATGATG')
Creating the position indices, note that we start at the 2nd index (the ‘first’ codon’s 3rd position) indicate each position as a span (i -- i+1).
>>> indices = [(i, i+1) for i in range(len(seq))[2::3]]
Create the sequence feature and use it to slice the sequence.
>>> pos3 = seq.addFeature('pos3', 'pos3', indices)
>>> pos3 = pos3.getSlice()
>>> assert str(pos3) == 'GGGG'
The only difference here to above is that our spans cover 2 positions.
>>> from cogent import DNA
>>> seq = DNA.makeSequence('ATGATGATGATG')
>>> indices = [(i, i+2) for i in range(len(seq))[::3]]
>>> pos12 = seq.addFeature('pos12', 'pos12', indices)
>>> pos12 = pos12.getSlice()
>>> assert str(pos12) == 'ATATATAT'
print rnaseq.shuffle()
ACAACUGGCUCUGAUG
>>> from cogent import RNA
>>> s = RNA.makeSequence('--AUUAUGCUAU-UAu--')
>>> print s.degap()
AUUAUGCUAUUAU