[Clone]



CS|QTM|LING-329: Computational Linguistics (Spring 2025)
python -m pip install --upgrade pippip install setuptoolspip install elit_tokenizerfrom elit_tokenizer import EnglishTokenizer
if __name__ == '__main__':
text = 'Emory NLP is a research lab in Atlanta, GA. It was founded by Jinho D. Choi in 2014. Dr. Choi is a professor at Emory University.'
tokenizer = EnglishTokenizer()
sentence = tokenizer.decode(text)
print(sentence.tokens)
print(sentence.offsets)['Emory', 'NLP', 'is', 'a', 'research', 'lab', 'in', 'Atlanta', ',', 'GA', '.', 'It', 'was', 'founded', 'by', 'Jinho', 'D.', 'Choi', 'in', '2014', '.', 'Dr.', 'Choi', 'is', 'a', 'professor', 'at', 'Emory', 'University', '.']
[(0, 5), (6, 9), (10, 12), (13, 14), (15, 23), (24, 27), (28, 30), (31, 38), (38, 39), (40, 42), (42, 43), (44, 46), (47, 50), (51, 58), (59, 61), (62, 67), (68, 70), (71, 75), (76, 78), (79, 83), (83, 84), (85, 88), (89, 93), (94, 96), (97, 98), (99, 108), (109, 111), (112, 117), (118, 128), (128, 129)].gitignore
src/__init__.py
src/homework/__init__.py
src/homework/getting_started.py.idea/
.venv/