Source code for languageflow.model.crf

import pycrfsuite


[docs]class CRF: def __init__(self, params={'c1':0.1, 'c2':0.01, 'feature.minfreq':0}, filename=None): self.estimator = None self.params = params self.filename = filename
[docs] def fit(self, X, y): """Fit CRF according to X, y Parameters ---------- X : list of text each item is a text y: list each item is either a label (in multi class problem) or list of labels (in multi label problem) """ trainer = pycrfsuite.Trainer(verbose=True) for xseq, yseq in zip(X, y): trainer.append(xseq, yseq) trainer.set_params(self.params) if self.filename: filename = self.filename else: filename = 'model.tmp' trainer.train(filename) tagger = pycrfsuite.Tagger() tagger.open(filename) self.estimator = tagger
[docs] def predict(self, X): """Predict class labels for samples in X. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Samples. """ if isinstance(X[0], list): return [self.estimator.tag(x) for x in X] return self.estimator.tag(X)