Stemming

danbihan · updated November 14, 2021
roots = ["cat", "bat", "rat", "kempt"]
sentence = "the unkempt cattle was rattled by the battery"

def replace_words(roots, sentence):
    words_list = sentence.split(' ')

    # Create a dictionary of words that can be stemmed
    stemming_dict = {
        word: stem 
        for word in words_list 
        for stem in roots 
        if stem in word 
            # Don't chop off prefixes
            and word.startswith(stem)
    }
    
    for word, stem in stemming_dict.items():
        sentence = sentence.replace(word, stem)
        
    return sentence

print(replace_words(roots, sentence))
Output

Comments

Please sign up or log in to contribute to the discussion.