from bisect import bisect_left, bisect_right
from itertools import chain, pairwise
def get_span(smaller_sentence, larger_sentence):
stripped = larger_sentence.translate(str.maketrans('', '', '[]'))
if (left := stripped.find(smaller_sentence)) == -1:
raise ValueError(f"Span not found {smaller_sentence, larger_sentence}")
right = left + len(smaller_sentence)
count = last_index = 0
lefts, rights = [], []
while True:
for bracket, indices in zip('[]', (lefts, rights)):
index = larger_sentence.find(bracket, last_index)
if index == -1:
break
indices.append(index - count)
count += 1
last_index = index + 1
else:
continue
break
start, end = bisect_left(lefts, left), bisect_right(rights, right)
return ''.join(
fragment for (fragment_start, fragment_end), bracket in zip(
pairwise((
left,
*chain.from_iterable(zip(lefts[start:end], rights[start:end])),
right
)),
(*'[]' * (end - start), '')
)
for fragment in (stripped[fragment_start:fragment_end], bracket)
)
print(
get_span(
"quick brown fox jumps over",
"The [quick brown fox] [jumps over] [the lazy dog]."
)
)
To embed this project on your website, copy the following code and paste it into your website's HTML: