Add protein-translation exercise (#165)

This commit is contained in:
Erik Schierboom
2024-05-01 01:24:34 +02:00
committed by GitHub
parent e8636a4e39
commit 0f78f34848
8 changed files with 525 additions and 0 deletions

View File

@@ -372,6 +372,14 @@
"practices": [],
"prerequisites": [],
"difficulty": 3
},
{
"slug": "protein-translation",
"name": "Protein Translation",
"uuid": "28d72a65-843a-4c31-bf74-c6e4f52a598e",
"practices": [],
"prerequisites": [],
"difficulty": 3
}
]
},

View File

@@ -0,0 +1,45 @@
# Instructions
Translate RNA sequences into proteins.
RNA can be broken into three nucleotide sequences called codons, and then translated to a polypeptide like so:
RNA: `"AUGUUUUCU"` => translates to
Codons: `"AUG", "UUU", "UCU"`
=> which become a polypeptide with the following sequence =>
Protein: `"Methionine", "Phenylalanine", "Serine"`
There are 64 codons which in turn correspond to 20 amino acids; however, all of the codon sequences and resulting amino acids are not important in this exercise.
If it works for one codon, the program should work for all of them.
However, feel free to expand the list in the test suite to include them all.
There are also three terminating codons (also known as 'STOP' codons); if any of these codons are encountered (by the ribosome), all translation ends and the protein is terminated.
All subsequent codons after are ignored, like this:
RNA: `"AUGUUUUCUUAAAUG"` =>
Codons: `"AUG", "UUU", "UCU", "UAA", "AUG"` =>
Protein: `"Methionine", "Phenylalanine", "Serine"`
Note the stop codon `"UAA"` terminates the translation and the final methionine is not translated into the protein sequence.
Below are the codons and resulting Amino Acids needed for the exercise.
| Codon | Protein |
| :----------------- | :------------ |
| AUG | Methionine |
| UUU, UUC | Phenylalanine |
| UUA, UUG | Leucine |
| UCU, UCC, UCA, UCG | Serine |
| UAU, UAC | Tyrosine |
| UGU, UGC | Cysteine |
| UGG | Tryptophan |
| UAA, UAG, UGA | STOP |
Learn more about [protein translation on Wikipedia][protein-translation].
[protein-translation]: https://en.wikipedia.org/wiki/Translation_(biology)

View File

@@ -0,0 +1,18 @@
{
"authors": [
"erikschierboom"
],
"files": {
"solution": [
"protein-translation.8th"
],
"test": [
"test.8th"
],
"example": [
".meta/example.8th"
]
},
"blurb": "Translate RNA sequences into proteins.",
"source": "Tyler Long"
}

View File

@@ -0,0 +1,24 @@
{
"AUG": "Methionine",
"UUU": "Phenylalanine", "UUC": "Phenylalanine",
"UUA": "Leucine", "UUG": "Leucine",
"UCU": "Serine", "UCC": "Serine", "UCA": "Serine", "UCG": "Serine",
"UAU": "Tyrosine", "UAC": "Tyrosine",
"UGU": "Cysteine", "UGC": "Cysteine",
"UGG": "Tryptophan",
"UAA": "STOP", "UAG": "STOP", "UGA": "STOP"
} constant codon-protein-map
: >protein \ s -- s
codon-protein-map swap caseof
;
: >codons \ s -- a
/[[:alpha:]]{1,3}/ r:+/
;
: >proteins \ s -- a
>codons a:new swap
( >protein null? if nip break ;then
dup "STOP" s:= if drop break else a:push then ) a:each! drop
;

View File

@@ -0,0 +1,102 @@
# This is an auto-generated file.
#
# Regenerating this file via `configlet sync` will:
# - Recreate every `description` key/value pair
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
# - Preserve any other key/value pair
#
# As user-added comments (using the # character) will be removed when this file
# is regenerated, comments can be added via a `comment` key.
[2c44f7bf-ba20-43f7-a3bf-f2219c0c3f98]
description = "Empty RNA sequence results in no proteins"
[96d3d44f-34a2-4db4-84cd-fff523e069be]
description = "Methionine RNA sequence"
[1b4c56d8-d69f-44eb-be0e-7b17546143d9]
description = "Phenylalanine RNA sequence 1"
[81b53646-bd57-4732-b2cb-6b1880e36d11]
description = "Phenylalanine RNA sequence 2"
[42f69d4f-19d2-4d2c-a8b0-f0ae9ee1b6b4]
description = "Leucine RNA sequence 1"
[ac5edadd-08ed-40a3-b2b9-d82bb50424c4]
description = "Leucine RNA sequence 2"
[8bc36e22-f984-44c3-9f6b-ee5d4e73f120]
description = "Serine RNA sequence 1"
[5c3fa5da-4268-44e5-9f4b-f016ccf90131]
description = "Serine RNA sequence 2"
[00579891-b594-42b4-96dc-7ff8bf519606]
description = "Serine RNA sequence 3"
[08c61c3b-fa34-4950-8c4a-133945570ef6]
description = "Serine RNA sequence 4"
[54e1e7d8-63c0-456d-91d2-062c72f8eef5]
description = "Tyrosine RNA sequence 1"
[47bcfba2-9d72-46ad-bbce-22f7666b7eb1]
description = "Tyrosine RNA sequence 2"
[3a691829-fe72-43a7-8c8e-1bd083163f72]
description = "Cysteine RNA sequence 1"
[1b6f8a26-ca2f-43b8-8262-3ee446021767]
description = "Cysteine RNA sequence 2"
[1e91c1eb-02c0-48a0-9e35-168ad0cb5f39]
description = "Tryptophan RNA sequence"
[e547af0b-aeab-49c7-9f13-801773a73557]
description = "STOP codon RNA sequence 1"
[67640947-ff02-4f23-a2ef-816f8a2ba72e]
description = "STOP codon RNA sequence 2"
[9c2ad527-ebc9-4ace-808b-2b6447cb54cb]
description = "STOP codon RNA sequence 3"
[f4d9d8ee-00a8-47bf-a1e3-1641d4428e54]
description = "Sequence of two protein codons translates into proteins"
[dd22eef3-b4f1-4ad6-bb0b-27093c090a9d]
description = "Sequence of two different protein codons translates into proteins"
[d0f295df-fb70-425c-946c-ec2ec185388e]
description = "Translate RNA strand into correct protein list"
[e30e8505-97ec-4e5f-a73e-5726a1faa1f4]
description = "Translation stops if STOP codon at beginning of sequence"
[5358a20b-6f4c-4893-bce4-f929001710f3]
description = "Translation stops if STOP codon at end of two-codon sequence"
[ba16703a-1a55-482f-bb07-b21eef5093a3]
description = "Translation stops if STOP codon at end of three-codon sequence"
[4089bb5a-d5b4-4e71-b79e-b8d1f14a2911]
description = "Translation stops if STOP codon in middle of three-codon sequence"
[2c2a2a60-401f-4a80-b977-e0715b23b93d]
description = "Translation stops if STOP codon in middle of six-codon sequence"
[1e75ea2a-f907-4994-ae5c-118632a1cb0f]
description = "Non-existing codon can't translate"
include = false
[9eac93f3-627a-4c90-8653-6d0a0595bc6f]
description = "Unknown amino acids, not part of a codon, can't translate"
reimplements = "1e75ea2a-f907-4994-ae5c-118632a1cb0f"
[9d73899f-e68e-4291-b1e2-7bf87c00f024]
description = "Incomplete RNA sequence can't translate"
[43945cf7-9968-402d-ab9f-b8a28750b050]
description = "Incomplete RNA sequence can translate if valid until a STOP codon"

View File

@@ -0,0 +1,173 @@
needs console/loaded
\ -----------------------------------------------------------------
ns: test
-1 var, test-count
var tests-passed
var tests-failed
var tests-skipped
true var, run-test
\ Some utility words
: test-passed \ s x x -- \\ test name, expected value, actual value
2drop
1 tests-passed n:+!
con:green con:onBlack . space " ... OK" . con:white con:onBlack cr
;
: test-skipped \ s --
1 tests-skipped n:+!
con:cyan con:onBlack . space " ... SKIPPED" . con:white con:onBlack cr
;
: test-failed \ s x x -- \\ test name, expected value, actual value
1 tests-failed n:+!
rot
con:red con:onBlack . space " ... FAIL" . con:white con:onBlack cr
" Actual: «" . . "»" . cr
" Expected: «" . . "»" . cr cr
;
: isword? \ x -- x f
dup >kind ns:w n:=
;
: run-test? \ -- T
run-test @ if true else "RUN_ALL_TESTS" getenv n:>bool then
;
\ Num passed + num skipped + num failed should == num tests
: all-tests-run? \ -- T
tests-passed @ tests-skipped @ tests-failed @ n:+ n:+
test-count @ n:=
;
\ returns true if x is a date, false otherwise
: date? \ x -- x T
dup >kind ns:d n:=
;
\ adapted from 8th forum -- https://8th-dev.com/forum/index.php/topic,2745.0.html
: eq? \ x x -- T
\ are the items the same kind?
2dup >kind swap >kind n:=
!if 2drop false ;then
\ same kind: try different comparators
number? if n:= ;then
string? if s:= ;then
array? if ' eq? a:= 2nip ;then
map? if ' eq? m:= 2nip ;then
date? if d:= ;then
\ otherwise fall back to 'lazy evaluation'
l: =
;
: eps_eq? \ n x x -- T
\ are the items the same kind?
2dup >kind swap >kind n:=
!if 2drop false ;then
number? !if 2drop false ;then
rot n:~=
;
: check-depth \ ... n -- ...
dup>r
n:1+ depth n:=
!if
con:red con:onBlack
"PANIC: expected stack depth to be " . r> . cr
"Stack is:" . cr
.s cr
255 die
then
rdrop
;
\ -----------------------------------------------------------------
\ status report at end of run
( all-tests-run?
!if con:red con:onBlack "... FAIL - not all tests completed" . con:white con:onBlack cr then
) onexit
\ Print a summary of the tests run
( con:white con:onBlack
test-count @ . space "tests planned - " .
tests-passed @ . space "passed - " .
tests-skipped @ . space "skipped - " .
tests-failed @ . space "failed" . cr
) onexit
\ -----------------------------------------------------------------
\ The public-facing words
\ -----------------------------------------------------------------
: equal? \ s x w -- | s w x --
run-test? !if 2drop test-skipped ;; then
isword? !if swap then
w:exec
3 check-depth
2dup \ so test-failed can show actual and expected
eq? if test-passed else test-failed then
;
: approx_equal? \ s x w n -- | s w x n --
run-test? !if 3drop test-skipped ;; then
-rot isword? !if swap then
w:exec
4 check-depth
3dup \ so test-failed can show actual and expected
eps_eq?
if rot drop test-passed else rot drop test-failed then
;
: true? \ s w --
run-test? !if drop test-skipped ;; then
w:exec
2 check-depth
true swap dup \ so test-failed can show actual and expected
if test-passed else test-failed then
;
: false? \ s w --
run-test? !if drop test-skipped ;; then
w:exec
2 check-depth
false swap dup \ so test-failed can show actual and expected
!if test-passed else test-failed then
;
: null? \ s w --
run-test? !if drop test-skipped ;; then
w:exec
2 check-depth
null swap dup \ so test-failed can show actual and expected
G:null? nip if test-passed else test-failed then
;
: SKIP-REST-OF-TESTS false run-test ! ;
: tests \ n --
test-count !
;
\ Set the exit status:
\ 0 = all OK
\ 1 = not all tests were run (some error occurred)
\ 2 = some tests failed
: end-of-tests \ --
all-tests-run?
if
tests-failed @ 0 n:= if 0 else 2 then
else
1
then
die
;

View File

@@ -0,0 +1,3 @@
: >proteins \ s -- a
;

View File

@@ -0,0 +1,152 @@
"protein-translation.8th" f:include
needs exercism/test
with: test
29 tests
"Empty RNA sequence results in no proteins"
( "" >proteins )
[]
equal?
SKIP-REST-OF-TESTS
"Methionine RNA sequence"
( "AUG" >proteins )
["Methionine"]
equal?
"Phenylalanine RNA sequence 1"
( "UUU" >proteins )
["Phenylalanine"]
equal?
"Phenylalanine RNA sequence 2"
( "UUC" >proteins )
["Phenylalanine"]
equal?
"Leucine RNA sequence 1"
( "UUA" >proteins )
["Leucine"]
equal?
"Leucine RNA sequence 2"
( "UUG" >proteins )
["Leucine"]
equal?
"Serine RNA sequence 1"
( "UCU" >proteins )
["Serine"]
equal?
"Serine RNA sequence 2"
( "UCC" >proteins )
["Serine"]
equal?
"Serine RNA sequence 3"
( "UCA" >proteins )
["Serine"]
equal?
"Serine RNA sequence 4"
( "UCG" >proteins )
["Serine"]
equal?
"Tyrosine RNA sequence 1"
( "UAU" >proteins )
["Tyrosine"]
equal?
"Tyrosine RNA sequence 2"
( "UAC" >proteins )
["Tyrosine"]
equal?
"Cysteine RNA sequence 1"
( "UGU" >proteins )
["Cysteine"]
equal?
"Cysteine RNA sequence 2"
( "UGC" >proteins )
["Cysteine"]
equal?
"Tryptophan RNA sequence"
( "UGG" >proteins )
["Tryptophan"]
equal?
"STOP codon RNA sequence 1"
( "UAA" >proteins )
[]
equal?
"STOP codon RNA sequence 2"
( "UAG" >proteins )
[]
equal?
"STOP codon RNA sequence 3"
( "UGA" >proteins )
[]
equal?
"Sequence of two protein codons translates into proteins"
( "UUUUUU" >proteins )
["Phenylalanine", "Phenylalanine"]
equal?
"Sequence of two different protein codons translates into proteins"
( "UUAUUG" >proteins )
["Leucine", "Leucine"]
equal?
"Translate RNA strand into correct protein list"
( "AUGUUUUGG" >proteins )
["Methionine", "Phenylalanine", "Tryptophan"]
equal?
"Translation stops if STOP codon at beginning of sequence"
( "UAGUGG" >proteins )
[]
equal?
"Translation stops if STOP codon at end of two-codon sequence"
( "UGGUAG" >proteins )
["Tryptophan"]
equal?
"Translation stops if STOP codon at end of three-codon sequence"
( "AUGUUUUAA" >proteins )
["Methionine", "Phenylalanine"]
equal?
"Translation stops if STOP codon in middle of three-codon sequence"
( "UGGUAGUGG" >proteins )
["Tryptophan"]
equal?
"Translation stops if STOP codon in middle of six-codon sequence"
( "UGGUGUUAUUAAUGGUUU" >proteins )
["Tryptophan", "Cysteine", "Tyrosine"]
equal?
"Unknown amino acids, not part of a codon, can't translate"
( "XYZ" >proteins )
null?
"Incomplete RNA sequence can't translate"
( "AUGU" >proteins )
null?
"Incomplete RNA sequence can translate if valid until a STOP codon"
( "UUCUUCUAAUGGU" >proteins )
["Phenylalanine", "Phenylalanine"]
equal?
end-of-tests
;with