您使用 sklearn "CountVectorizer" 和 "TfidfVectorizer" 将文本数据转换为向量
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5, norm='l2', encoding='latin-1', ngram_range=(1, 2), stop_words='english')
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['class'], random_state = 0)
# vector representations of the text
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
# Building a SVM model
svmmodel = LinearSVC().fit(X_train_tfidf, y_train)