KGF 2 стал вторым самым кассовым фильмом

“KGF 2 стал вторым по прибылям фильмом”.

Метод 1: использование библиотек обработки естественного языка

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
def extract_keywords(text, num_keywords):
    # Tokenize the text into words
    tokens = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word.casefold() not in stop_words]

    # Stemming words
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(word) for word in tokens]

    # Count word frequency
    word_frequency = nltk.FreqDist(stemmed_tokens)

    # Extract top keywords
    keywords = [word for word, freq in word_frequency.most_common(num_keywords)]

    return keywords
# Example usage
text = "KGF 2 became the second highest-grossing film."
num_keywords = 5
keywords = extract_keywords(text, num_keywords)
# Output
print("Keywords:", keywords)

Метод 2: использование TF-IDF

from sklearn.feature_extraction.text import TfidfVectorizer
def extract_keywords(text, num_keywords):
    # Create TF-IDF vectorizer
    vectorizer = TfidfVectorizer(max_features=num_keywords)

    # Fit and transform the text
    tfidf_matrix = vectorizer.fit_transform([text])

    # Get feature names
    feature_names = vectorizer.get_feature_names()

    return feature_names
# Example usage
text = "KGF 2 became the second highest-grossing film."
num_keywords = 5
keywords = extract_keywords(text, num_keywords)
# Output
print("Keywords:", keywords)

Метод 3. Использование RAKE (быстрое автоматическое извлечение ключевых слов)

from rake_nltk import Rake
def extract_keywords(text, num_keywords):
    # Create RAKE object
    r = Rake()

    # Extract keywords
    r.extract_keywords_from_text(text)
    keywords = r.get_ranked_phrases()[:num_keywords]

    return keywords
# Example usage
text = "KGF 2 became the second highest-grossing film."
num_keywords = 5
keywords = extract_keywords(text, num_keywords)
# Output
print("Keywords:", keywords)