Version: 3.x

rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer

CountVectorsFeaturizer Objects

@DefaultV1Recipe.register(
    DefaultV1Recipe.ComponentType.MESSAGE_FEATURIZER, is_trainable=True
)
class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent)

Creates a sequence of token counts features based on sklearn's CountVectorizer.

All tokens which consist only of digits (e.g. 123 and 99 but not ab12d) will be represented by a single feature.

Set analyzer to 'char_wb' to use the idea of Subword Semantic Hashing from https://arxiv.org/abs/1810.07150.

required_components

@classmethod
def required_components(cls) -> List[Type]

Components that should be included in the pipeline before this component.

get_default_config

@staticmethod
def get_default_config() -> Dict[Text, Any]

Returns the component's default config.

required_packages

@staticmethod
def required_packages() -> List[Text]

Any extra python dependencies required for this component to run.

init

def __init__(config: Dict[Text, Any],
             model_storage: ModelStorage,
             resource: Resource,
             execution_context: ExecutionContext,
             vectorizers: Optional[Dict[Text, "CountVectorizer"]] = None,
             oov_token: Optional[Text] = None,
             oov_words: Optional[List[Text]] = None) -> None

Constructs a new count vectorizer using the sklearn framework.

create

@classmethod
def create(cls, config: Dict[Text, Any], model_storage: ModelStorage,
           resource: Resource,
           execution_context: ExecutionContext) -> CountVectorsFeaturizer

Creates a new untrained component (see parent class for full docstring).

train

def train(training_data: TrainingData,
          model: Optional[SpacyModel] = None) -> Resource

Trains the featurizer.

Take parameters from config and construct a new count vectorizer using the sklearn framework.

process_training_data

def process_training_data(training_data: TrainingData) -> TrainingData

Processes the training examples in the given training data in-place.

Arguments:

training_data - the training data

Returns:

same training data after processing

process

def process(messages: List[Message]) -> List[Message]

Processes incoming message and compute and set features.

persist

def persist() -> None

Persist this model into the passed directory.

Returns the metadata necessary to load the model again.

load

@classmethod
def load(cls, config: Dict[Text, Any], model_storage: ModelStorage,
         resource: Resource, execution_context: ExecutionContext,
         **kwargs: Any) -> CountVectorsFeaturizer

Loads trained component (see parent class for full docstring).

validate_config

@classmethod
def validate_config(cls, config: Dict[Text, Any]) -> None

Validates that the component is configured properly.

CountVectorsFeaturizer Objects#

required_components#

get_default_config#

required_packages#

__init__#

create#

train#

process_training_data#

process#

persist#

load#

validate_config#