import hashlib

def simhash(x: str) -> int:
    vec = [0] * 128
    for i in range(len(x) - 2):
        shingle = x[i : i + 3]
        h = int(hashlib.md5(shingle.encode()).hexdigest(), 16)
        for j in range(128):
            if h & (1 << j):
                vec[j] += 1
            else:
                vec[j] -= 1

    result = 0
    for i in range(128):
        if vec[i] >= 0:
            result |= 1 << i

    return f"{result:032x}"

sentence1 = "a quick brown fox jumps over the lazy dog"
sentence2 = "the quick brown fox jumps over the lazy dog"

print(simhash(sentence1))
print(simhash(sentence2))

Embed on website

To embed this program on your website, copy the following code and paste it into your website's HTML: