几种tokenize的方法
1.nltk
import nltk
sentence = "No left turn sign on a street sign at 3rd Ave. A sky view of London, England- the palace and parliament. a retro kitchen with brown wood-paneled designs and other 1960's-era objects."
tokens = nltk.word_tokenize(sentence)
' | '.join([i for i in tokens])
"No | left | turn | sign | on | a | street | sign | at | 3rd | Ave. | A | sky | view | of | London | , | England- | the | palace | and | parliament | . | a | retro | kitchen | with | brown | wood-paneled | designs | and | other | 1960's-era | objects | ."
2.spacy
from spacy.lang.en import English
nlp = English()
sentence = "No left turn sign on a street sign at 3rd Ave. A sky view of London, England- the palace and parliament. a retro kitchen with brown wood-paneled designs and other 1960's-era objects."
tokens = nlp(sentence)
token_list = []
for token in tokens:
token_list.append(token.text)
' | '.join([i for i in token_list])
"No | left | turn | sign | on | a | street | sign | at | 3rd | Ave | . | A | sky | view | of | London | , | England- | the | palace | and | parliament | . | a | retro | kitchen | with | brown | wood | - | paneled | designs | and | other | 1960's | - | era | objects | ."
3.stanfordnlp
import stanfordnlp
nlp = stanfordnlp.Pipeline(processors='tokenize', lang='en')
sentence = "No left turn sign on a street sign at 3rd Ave. A sky view of London, England- the palace and parliament. a retro kitchen with brown wood-paneled designs and other 1960's-era objects."
doc = nlp(sentence)
token_list = []
for i, sentence in enumerate(doc.sentences):
token_list.append(' | '.join(token.text for token in sentence.tokens))
token_list
['No | left | turn | sign | on | a | street | sign | at | 3rd',
'Ave | .',
'A | sky | view | of | London | , | England | - | the | palace | and | parliament | .',
"a | retro | kitchen | with | brown | wood | - | paneled | designs | and | other | 1960 | 's | - | era | objects | ."]
最后编辑于:2022 年 12 月 26 日 18:13