notice

This is documentation for Rasa Documentation v2.x, which is no longer actively maintained.
For up-to-date documentation, see the latest version (3.x).

Version: 2.x

rasa.nlu.test

CVEvaluationResult Objects

class CVEvaluationResult(NamedTuple)

Stores NLU cross-validation results.

log_evaluation_table

log_evaluation_table(report: Text, precision: float, f1: float, accuracy: float) -> None

Log the sklearn evaluation metrics.

remove_empty_intent_examples

remove_empty_intent_examples(intent_results: List[IntentEvaluationResult]) -> List[IntentEvaluationResult]

Remove those examples without an intent.

Arguments:

intent_results - intent evaluation results
Returns - intent evaluation results

remove_empty_response_examples

remove_empty_response_examples(response_results: List[ResponseSelectionEvaluationResult]) -> List[ResponseSelectionEvaluationResult]

Remove those examples without a response.

Arguments:

response_results - response selection evaluation results
Returns - response selection evaluation results

drop_intents_below_freq

drop_intents_below_freq(training_data: TrainingData, cutoff: int = 5) -> TrainingData

Remove intent groups with less than cutoff instances.

Arguments:

training_data - training data
cutoff - threshold
Returns - updated training data

write_intent_successes

write_intent_successes(intent_results: List[IntentEvaluationResult], successes_filename: Text) -> None

Write successful intent predictions to a file.

Arguments:

intent_results - intent evaluation result
successes_filename - filename of file to save successful predictions to

write_response_successes

write_response_successes(response_results: List[ResponseSelectionEvaluationResult], successes_filename: Text) -> None

Write successful response selection predictions to a file.

Arguments:

response_results - response selection evaluation result
successes_filename - filename of file to save successful predictions to

plot_attribute_confidences

plot_attribute_confidences(results: Union[
        List[IntentEvaluationResult], List[ResponseSelectionEvaluationResult]
    ], hist_filename: Optional[Text], target_key: Text, prediction_key: Text, title: Text) -> None

Create histogram of confidence distribution.

Arguments:

results - evaluation results
hist_filename - filename to save plot to
target_key - key of target in results
prediction_key - key of predictions in results
title - title of plot

plot_entity_confidences

plot_entity_confidences(merged_targets: List[Text], merged_predictions: List[Text], merged_confidences: List[float], hist_filename: Text, title: Text) -> None

Creates histogram of confidence distribution.

Arguments:

merged_targets - Entity labels.
merged_predictions - Predicted entities.
merged_confidences - Confidence scores of predictions.
hist_filename - filename to save plot to
title - title of plot

evaluate_response_selections

evaluate_response_selections(response_selection_results: List[ResponseSelectionEvaluationResult], output_directory: Optional[Text], successes: bool, errors: bool, disable_plotting: bool, report_as_dict: Optional[bool] = None) -> Dict

Creates summary statistics for response selection.

Only considers those examples with a set response. Others are filtered out. Returns a dictionary of containing the evaluation result.

Arguments:

response_selection_results - response selection evaluation results
output_directory - directory to store files to
successes - if True success are written down to disk
errors - if True errors are written down to disk
disable_plotting - if True no plots are created
report_as_dict - True if the evaluation report should be returned as dict. If False the report is returned in a human-readable text format. If Nonereport_as_dict is considered as True in case an output_directory is given.
Returns - dictionary with evaluation results

evaluate_intents

evaluate_intents(intent_results: List[IntentEvaluationResult], output_directory: Optional[Text], successes: bool, errors: bool, disable_plotting: bool, report_as_dict: Optional[bool] = None) -> Dict

Creates summary statistics for intents.

Only considers those examples with a set intent. Others are filtered out. Returns a dictionary of containing the evaluation result.

Arguments:

intent_results - intent evaluation results
output_directory - directory to store files to
successes - if True correct predictions are written to disk
errors - if True incorrect predictions are written to disk
disable_plotting - if True no plots are created
report_as_dict - True if the evaluation report should be returned as dict. If False the report is returned in a human-readable text format. If Nonereport_as_dict is considered as True in case an output_directory is given.
Returns - dictionary with evaluation results

merge_labels

merge_labels(aligned_predictions: List[Dict], extractor: Optional[Text] = None) -> List[Text]

Concatenates all labels of the aligned predictions.

Takes the aligned prediction labels which are grouped for each message and concatenates them.

Arguments:

aligned_predictions - aligned predictions
extractor - entity extractor name
Returns - concatenated predictions

merge_confidences

merge_confidences(aligned_predictions: List[Dict], extractor: Optional[Text] = None) -> List[float]

Concatenates all confidences of the aligned predictions.

Takes the aligned prediction confidences which are grouped for each message and concatenates them.

Arguments:

aligned_predictions - aligned predictions
extractor - entity extractor name
Returns - concatenated confidences

substitute_labels

substitute_labels(labels: List[Text], old: Text, new: Text) -> List[Text]

Replaces label names in a list of labels.

Arguments:

labels - list of labels
old - old label name that should be replaced
new - new label name
Returns - updated labels

collect_incorrect_entity_predictions

collect_incorrect_entity_predictions(entity_results: List[EntityEvaluationResult], merged_predictions: List[Text], merged_targets: List[Text]) -> List["EntityPrediction"]

Get incorrect entity predictions.

Arguments:

entity_results - entity evaluation results
merged_predictions - list of predicted entity labels
merged_targets - list of true entity labels
Returns - list of incorrect predictions

write_successful_entity_predictions

write_successful_entity_predictions(entity_results: List[EntityEvaluationResult], merged_targets: List[Text], merged_predictions: List[Text], successes_filename: Text) -> None

Write correct entity predictions to a file.

Arguments:

entity_results - response selection evaluation result
merged_predictions - list of predicted entity labels
merged_targets - list of true entity labels
successes_filename - filename of file to save correct predictions to

collect_successful_entity_predictions

collect_successful_entity_predictions(entity_results: List[EntityEvaluationResult], merged_predictions: List[Text], merged_targets: List[Text]) -> List["EntityPrediction"]

Get correct entity predictions.

Arguments:

entity_results - entity evaluation results
merged_predictions - list of predicted entity labels
merged_targets - list of true entity labels
Returns - list of correct predictions

evaluate_entities

evaluate_entities(entity_results: List[EntityEvaluationResult], extractors: Set[Text], output_directory: Optional[Text], successes: bool, errors: bool, disable_plotting: bool, report_as_dict: Optional[bool] = None) -> Dict

Creates summary statistics for each entity extractor.

Logs precision, recall, and F1 per entity type for each extractor.

Arguments:

entity_results - entity evaluation results
extractors - entity extractors to consider
output_directory - directory to store files to
successes - if True correct predictions are written to disk
errors - if True incorrect predictions are written to disk
disable_plotting - if True no plots are created
report_as_dict - True if the evaluation report should be returned as dict. If False the report is returned in a human-readable text format. If Nonereport_as_dict is considered as True in case an output_directory is given.
Returns - dictionary with evaluation results

is_token_within_entity

is_token_within_entity(token: Token, entity: Dict) -> bool

Checks if a token is within the boundaries of an entity.

does_token_cross_borders

does_token_cross_borders(token: Token, entity: Dict) -> bool

Checks if a token crosses the boundaries of an entity.

determine_intersection

determine_intersection(token: Token, entity: Dict) -> int

Calculates how many characters a given token and entity share.

do_entities_overlap

do_entities_overlap(entities: List[Dict]) -> bool

Checks if entities overlap.

I.e. cross each others start and end boundaries.

Arguments:

entities - list of entities
Returns - true if entities overlap, false otherwise.

find_intersecting_entities

find_intersecting_entities(token: Token, entities: List[Dict]) -> List[Dict]

Finds the entities that intersect with a token.

Arguments:

token - a single token
entities - entities found by a single extractor
Returns - list of entities

pick_best_entity_fit

pick_best_entity_fit(token: Token, candidates: List[Dict[Text, Any]]) -> Optional[Dict[Text, Any]]

Determines the best fitting entity given intersecting entities.

Arguments:

token - a single token
candidates - entities found by a single extractor
attribute_key - the attribute key of interest

Returns:

the value of the attribute key of the best fitting entity

determine_token_labels

determine_token_labels(token: Token, entities: List[Dict], extractors: Optional[Set[Text]] = None, attribute_key: Text = ENTITY_ATTRIBUTE_TYPE) -> Text

Determines the token label for the provided attribute key given entities that do not overlap.

Arguments:

token - a single token
entities - entities found by a single extractor
extractors - list of extractors
attribute_key - the attribute key for which the entity type should be returned

Returns:

entity type

determine_entity_for_token

determine_entity_for_token(token: Token, entities: List[Dict[Text, Any]], extractors: Optional[Set[Text]] = None) -> Optional[Dict[Text, Any]]

Determines the best fitting entity for the given token, given entities that do not overlap.

Arguments:

token - a single token
entities - entities found by a single extractor
extractors - list of extractors

Returns:

entity type

do_any_extractors_not_support_overlap

do_any_extractors_not_support_overlap(extractors: Optional[Set[Text]]) -> bool

Checks if any extractor does not support overlapping entities.

Arguments:

Names of the entitiy extractors

Returns:

True if and only if CRFEntityExtractor or DIETClassifier is in extractors

align_entity_predictions

align_entity_predictions(result: EntityEvaluationResult, extractors: Set[Text]) -> Dict

Aligns entity predictions to the message tokens.

Determines for every token the true label based on the prediction targets and the label assigned by each single extractor.

Arguments:

result - entity evaluation result
extractors - the entity extractors that should be considered
Returns - dictionary containing the true token labels and token labels from the extractors

align_all_entity_predictions

align_all_entity_predictions(entity_results: List[EntityEvaluationResult], extractors: Set[Text]) -> List[Dict]

Aligns entity predictions to the message tokens for the whole dataset using align_entity_predictions.

Arguments:

entity_results - list of entity prediction results
extractors - the entity extractors that should be considered
Returns - list of dictionaries containing the true token labels and token labels from the extractors

get_eval_data

get_eval_data(interpreter: Interpreter, test_data: TrainingData) -> Tuple[
    List[IntentEvaluationResult],
    List[ResponseSelectionEvaluationResult],
    List[EntityEvaluationResult],
]

Runs the model for the test set and extracts targets and predictions.

Returns intent results (intent targets and predictions, the original messages and the confidences of the predictions), response results ( response targets and predictions) as well as entity results (entity_targets, entity_predictions, and tokens).

Arguments:

interpreter - the interpreter
test_data - test data
Returns - intent, response, and entity evaluation results

get_entity_extractors

get_entity_extractors(interpreter: Interpreter) -> Set[Text]

Finds the names of entity extractors used by the interpreter.

Processors are removed since they do not detect the boundaries themselves.

Arguments:

interpreter - the interpreter
Returns - entity extractor names

is_entity_extractor_present

is_entity_extractor_present(interpreter: Interpreter) -> bool

Checks whether entity extractor is present.

is_intent_classifier_present

is_intent_classifier_present(interpreter: Interpreter) -> bool

Checks whether intent classifier is present.

is_response_selector_present

is_response_selector_present(interpreter: Interpreter) -> bool

Checks whether response selector is present.

get_available_response_selector_types

get_available_response_selector_types(interpreter: Interpreter) -> List[Text]

Gets all available response selector types.

remove_pretrained_extractors

remove_pretrained_extractors(pipeline: List[Component]) -> List[Component]

Remove pre-trained extractors from the pipeline.

Remove pre-trained extractors so that entities from pre-trained extractors are not predicted upon parsing.

Arguments:

pipeline - the pipeline

Returns:

Updated pipeline

run_evaluation

async run_evaluation(data_path: Text, model_path: Text, output_directory: Optional[Text] = None, successes: bool = False, errors: bool = False, component_builder: Optional[ComponentBuilder] = None, disable_plotting: bool = False, report_as_dict: Optional[bool] = None) -> Dict

Evaluate intent classification, response selection and entity extraction.

Arguments:

data_path - path to the test data
model_path - path to the model
output_directory - path to folder where all output will be stored
successes - if true successful predictions are written to a file
errors - if true incorrect predictions are written to a file
component_builder - component builder
disable_plotting - if true confusion matrix and histogram will not be rendered
report_as_dict - True if the evaluation report should be returned as dict. If False the report is returned in a human-readable text format. If Nonereport_as_dict is considered as True in case an output_directory is given.
Returns - dictionary containing evaluation results

generate_folds

generate_folds(n: int, training_data: TrainingData) -> Iterator[Tuple[TrainingData, TrainingData]]

Generates n cross validation folds for given training data.

combine_result

combine_result(intent_metrics: IntentMetrics, entity_metrics: EntityMetrics, response_selection_metrics: ResponseSelectionMetrics, interpreter: Interpreter, data: TrainingData, intent_results: Optional[List[IntentEvaluationResult]] = None, entity_results: Optional[List[EntityEvaluationResult]] = None, response_selection_results: Optional[
        List[ResponseSelectionEvaluationResult]
    ] = None) -> Tuple[IntentMetrics, EntityMetrics, ResponseSelectionMetrics]

Collects intent, response selection and entity metrics for cross validation folds.

If intent_results, response_selection_results or entity_results is provided as a list, prediction results are also collected.

Arguments:

intent_metrics - intent metrics
entity_metrics - entity metrics
response_selection_metrics - response selection metrics
interpreter - the interpreter
data - training data
intent_results - intent evaluation results
entity_results - entity evaluation results
response_selection_results - reponse selection evaluation results
Returns - intent, entity, and response selection metrics

cross_validate

cross_validate(data: TrainingData, n_folds: int, nlu_config: Union[RasaNLUModelConfig, Text, Dict], output: Optional[Text] = None, successes: bool = False, errors: bool = False, disable_plotting: bool = False, report_as_dict: Optional[bool] = None) -> Tuple[CVEvaluationResult, CVEvaluationResult, CVEvaluationResult]

Stratified cross validation on data.

Arguments:

data - Training Data
n_folds - integer, number of cv folds
nlu_config - nlu config file
output - path to folder where reports are stored
successes - if true successful predictions are written to a file
errors - if true incorrect predictions are written to a file
disable_plotting - if true no confusion matrix and historgram plates are created
report_as_dict - True if the evaluation report should be returned as dict. If False the report is returned in a human-readable text format. If Nonereport_as_dict is considered as True in case an output_directory is given.

Returns:

dictionary with key, list structure, where each entry in list corresponds to the relevant result for one fold

compute_metrics

compute_metrics(interpreter: Interpreter, training_data: TrainingData) -> Tuple[
    IntentMetrics,
    EntityMetrics,
    ResponseSelectionMetrics,
    List[IntentEvaluationResult],
    List[EntityEvaluationResult],
    List[ResponseSelectionEvaluationResult],
]

Computes metrics for intent classification, response selection and entity extraction.

Arguments:

interpreter - the interpreter
training_data - training data
Returns - intent, response selection and entity metrics, and prediction results.

compare_nlu

async compare_nlu(configs: List[Text], data: TrainingData, exclusion_percentages: List[int], f_score_results: Dict[Text, Any], model_names: List[Text], output: Text, runs: int) -> List[int]

Trains and compares multiple NLU models. For each run and exclusion percentage a model per config file is trained. Thereby, the model is trained only on the current percentage of training data. Afterwards, the model is tested on the complete test data of that run. All results are stored in the provided output directory.

Arguments:

configs - config files needed for training
data - training data
exclusion_percentages - percentages of training data to exclude during comparison
f_score_results - dictionary of model name to f-score results per run
model_names - names of the models to train
output - the output directory
runs - number of comparison runs
Returns - training examples per run

log_results

log_results(results: IntentMetrics, dataset_name: Text) -> None

Logs results of cross validation.

Arguments:

results - dictionary of results returned from cross validation
dataset_name - string of which dataset the results are from, e.g. test/train

log_entity_results

log_entity_results(results: EntityMetrics, dataset_name: Text) -> None

Logs entity results of cross validation.

Arguments:

results - dictionary of dictionaries of results returned from cross validation
dataset_name - string of which dataset the results are from, e.g. test/train

CVEvaluationResult Objects#

log_evaluation_table#

remove_empty_intent_examples#

remove_empty_response_examples#

drop_intents_below_freq#

write_intent_successes#

write_response_successes#

plot_attribute_confidences#

plot_entity_confidences#

evaluate_response_selections#

evaluate_intents#

merge_labels#

merge_confidences#

substitute_labels#

collect_incorrect_entity_predictions#

write_successful_entity_predictions#

collect_successful_entity_predictions#

evaluate_entities#

is_token_within_entity#

does_token_cross_borders#

determine_intersection#

do_entities_overlap#

find_intersecting_entities#

pick_best_entity_fit#

determine_token_labels#

determine_entity_for_token#

do_any_extractors_not_support_overlap#

align_entity_predictions#

align_all_entity_predictions#

get_eval_data#

get_entity_extractors#

is_entity_extractor_present#

is_intent_classifier_present#

is_response_selector_present#

get_available_response_selector_types#

remove_pretrained_extractors#

run_evaluation#

generate_folds#

combine_result#

cross_validate#

compute_metrics#

compare_nlu#

log_results#

log_entity_results#

CVEvaluationResult Objects

log_evaluation_table

remove_empty_intent_examples

remove_empty_response_examples

drop_intents_below_freq

write_intent_successes

write_response_successes

plot_attribute_confidences

plot_entity_confidences

evaluate_response_selections

evaluate_intents

merge_labels

merge_confidences

substitute_labels

collect_incorrect_entity_predictions

write_successful_entity_predictions

collect_successful_entity_predictions

evaluate_entities

is_token_within_entity

does_token_cross_borders

determine_intersection

do_entities_overlap

find_intersecting_entities

pick_best_entity_fit

determine_token_labels

determine_entity_for_token

do_any_extractors_not_support_overlap

align_entity_predictions

align_all_entity_predictions

get_eval_data

get_entity_extractors

is_entity_extractor_present

is_intent_classifier_present

is_response_selector_present

get_available_response_selector_types

remove_pretrained_extractors

run_evaluation

generate_folds

combine_result

cross_validate

compute_metrics

compare_nlu

log_results

log_entity_results