Only one carriage return (#2155)
* updating DIRECTORY.md
* touch
* fixup! Format Python code with psf/black push
* Update word_frequency_functions.py
* updating DIRECTORY.md
* Update word_frequency_functions.py
* Update lfu_cache.py
* Update sol1.py
Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
This commit is contained in:
@@ -40,7 +40,7 @@ from math import log10
|
||||
"""
|
||||
|
||||
|
||||
def term_frequency(term : str, document : str) -> int:
|
||||
def term_frequency(term: str, document: str) -> int:
|
||||
"""
|
||||
Return the number of times a term occurs within
|
||||
a given document.
|
||||
@@ -58,9 +58,7 @@ def term_frequency(term : str, document : str) -> int:
|
||||
str.maketrans("", "", string.punctuation)
|
||||
).replace("\n", "")
|
||||
tokenize_document = document_without_punctuation.split(" ") # word tokenization
|
||||
return len(
|
||||
[word for word in tokenize_document if word.lower() == term.lower()]
|
||||
)
|
||||
return len([word for word in tokenize_document if word.lower() == term.lower()])
|
||||
|
||||
|
||||
def document_frequency(term: str, corpus: str) -> int:
|
||||
@@ -77,17 +75,18 @@ is the second document in the corpus.\\nTHIS is \
|
||||
the third document in the corpus.")
|
||||
(1, 3)
|
||||
"""
|
||||
corpus_without_punctuation = corpus.translate(
|
||||
corpus_without_punctuation = corpus.lower().translate(
|
||||
str.maketrans("", "", string.punctuation)
|
||||
) # strip all punctuation and replace it with ''
|
||||
documents = corpus_without_punctuation.split("\n")
|
||||
lowercase_documents = [document.lower() for document in documents]
|
||||
return len(
|
||||
[document for document in lowercase_documents if term.lower() in document]
|
||||
), len(documents)
|
||||
docs = corpus_without_punctuation.split("\n")
|
||||
term = term.lower()
|
||||
return (
|
||||
len([doc for doc in docs if term in doc]),
|
||||
len(docs),
|
||||
)
|
||||
|
||||
|
||||
def inverse_document_frequency(df : int, N: int) -> float:
|
||||
def inverse_document_frequency(df: int, N: int) -> float:
|
||||
"""
|
||||
Return an integer denoting the importance
|
||||
of a word. This measure of importance is
|
||||
@@ -116,7 +115,7 @@ def inverse_document_frequency(df : int, N: int) -> float:
|
||||
return round(log10(N / df), 3)
|
||||
|
||||
|
||||
def tf_idf(tf : int, idf: int) -> float:
|
||||
def tf_idf(tf: int, idf: int) -> float:
|
||||
"""
|
||||
Combine the term frequency
|
||||
and inverse document frequency functions to
|
||||
|
||||
Reference in New Issue
Block a user