Only one carriage return (#2155)

* updating DIRECTORY.md * touch * fixup! Format Python code with psf/black push * Update word_frequency_functions.py * updating DIRECTORY.md * Update word_frequency_functions.py * Update lfu_cache.py * Update sol1.py Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
2020-06-25 19:15:30 +02:00
parent d2fa91b18e
commit 8ab84fd794
7 changed files with 77 additions and 68 deletions
--- a/machine_learning/word_frequency_functions.py
+++ b/machine_learning/word_frequency_functions.py
@@ -40,7 +40,7 @@ from math import log10
 """


-def term_frequency(term : str, document : str) -> int:
+def term_frequency(term: str, document: str) -> int:
    """
    Return the number of times a term occurs within
    a given document.
@@ -58,9 +58,7 @@ def term_frequency(term : str, document : str) -> int:
        str.maketrans("", "", string.punctuation)
    ).replace("\n", "")
    tokenize_document = document_without_punctuation.split(" ")  # word tokenization
-    return len(
-        [word for word in tokenize_document if word.lower() == term.lower()]
-    )
+    return len([word for word in tokenize_document if word.lower() == term.lower()])


 def document_frequency(term: str, corpus: str) -> int:
@@ -77,17 +75,18 @@ is the second document in the corpus.\\nTHIS is \
 the third document in the corpus.")
    (1, 3)
    """
-    corpus_without_punctuation = corpus.translate(
+    corpus_without_punctuation = corpus.lower().translate(
        str.maketrans("", "", string.punctuation)
    )  # strip all punctuation and replace it with ''
-    documents = corpus_without_punctuation.split("\n")
-    lowercase_documents = [document.lower() for document in documents]
-    return len(
-        [document for document in lowercase_documents if term.lower() in document]
-    ), len(documents)
+    docs = corpus_without_punctuation.split("\n")
+    term = term.lower()
+    return (
+        len([doc for doc in docs if term in doc]),
+        len(docs),
+    )


-def inverse_document_frequency(df : int, N: int) -> float:
+def inverse_document_frequency(df: int, N: int) -> float:
    """
    Return an integer denoting the importance
    of a word. This measure of importance is
@@ -116,7 +115,7 @@ def inverse_document_frequency(df : int, N: int) -> float:
    return round(log10(N / df), 3)


-def tf_idf(tf : int, idf: int) -> float:
+def tf_idf(tf: int, idf: int) -> float:
    """
    Combine the term frequency
    and inverse document frequency functions to