<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">(window.webpackJsonp=window.webpackJsonp||[]).push([[174],{1629:function(e,a,n){"use strict";n.d(a,"a",(function(){return p})),n.d(a,"b",(function(){return b}));var t=n(0),r=n.n(t);function o(e,a,n){return a in e?Object.defineProperty(e,a,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[a]=n,e}function i(e,a){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);a&amp;&amp;(t=t.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),n.push.apply(n,t)}return n}function s(e){for(var a=1;a&lt;arguments.length;a++){var n=null!=arguments[a]?arguments[a]:{};a%2?i(Object(n),!0).forEach((function(a){o(e,a,n[a])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):i(Object(n)).forEach((function(a){Object.defineProperty(e,a,Object.getOwnPropertyDescriptor(n,a))}))}return e}function c(e,a){if(null==e)return{};var n,t,r=function(e,a){if(null==e)return{};var n,t,r={},o=Object.keys(e);for(t=0;t&lt;o.length;t++)n=o[t],a.indexOf(n)&gt;=0||(r[n]=e[n]);return r}(e,a);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(t=0;t&lt;o.length;t++)n=o[t],a.indexOf(n)&gt;=0||Object.prototype.propertyIsEnumerable.call(e,n)&amp;&amp;(r[n]=e[n])}return r}var l=r.a.createContext({}),u=function(e){var a=r.a.useContext(l),n=a;return e&amp;&amp;(n="function"==typeof e?e(a):s(s({},a),e)),n},p=function(e){var a=u(e.components);return r.a.createElement(l.Provider,{value:a},e.children)},d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.a.createElement(r.a.Fragment,{},a)}},g=r.a.forwardRef((function(e,a){var n=e.components,t=e.mdxType,o=e.originalType,i=e.parentName,l=c(e,["components","mdxType","originalType","parentName"]),p=u(n),g=t,b=p["".concat(i,".").concat(g)]||p[g]||d[g]||o;return n?r.a.createElement(b,s(s({ref:a},l),{},{components:n})):r.a.createElement(b,s({ref:a},l))}));function b(e,a){var n=arguments,t=a&amp;&amp;a.mdxType;if("string"==typeof e||t){var o=n.length,i=new Array(o);i[0]=g;var s={};for(var c in a)hasOwnProperty.call(a,c)&amp;&amp;(s[c]=a[c]);s.originalType=e,s.mdxType="string"==typeof e?e:t,i[1]=s;for(var l=2;l&lt;o;l++)i[l]=n[l];return r.a.createElement.apply(null,i)}return r.a.createElement.apply(null,n)}g.displayName="MDXCreateElement"},694:function(e,a,n){"use strict";n.r(a),n.d(a,"frontMatter",(function(){return i})),n.d(a,"metadata",(function(){return s})),n.d(a,"rightToc",(function(){return c})),n.d(a,"default",(function(){return u}));var t=n(2),r=n(6),o=(n(0),n(1629)),i={id:"language-support",sidebar_label:"Language Support",title:"Language Support",abstract:"You can use Rasa to build assistants in any language you want."},s={unversionedId:"language-support",id:"language-support",isDocsHomePage:!1,title:"Language Support",description:"Your Rasa assistant can be used on training data in any language.",source:"@site/docs/language-support.mdx",slug:"/language-support",permalink:"/docs/rasa/next/language-support",editUrl:"https://github.com/rasahq/rasa/edit/main/docs/docs/language-support.mdx",version:"current",lastUpdatedBy:"Anthony De Guzman",lastUpdatedAt:1733510274,sidebar_label:"Language Support",sidebar:"default",previous:{title:"Training Data Importers",permalink:"/docs/rasa/next/training-data-importers"},next:{title:"Graph Recipe",permalink:"/docs/rasa/next/graph-recipe"}},c=[{value:"Training a Model in Any Languages",id:"training-a-model-in-any-languages",children:[]},{value:"Using Pre-trained Language Models",id:"using-pre-trained-language-models",children:[{value:"spaCy",id:"spacy",children:[]},{value:"MITIE",id:"mitie",children:[]}]}],l={rightToc:c};function u(e){var a=e.components,n=Object(r.a)(e,["components"]);return Object(o.b)("wrapper",Object(t.a)({},l,n,{components:a,mdxType:"MDXLayout"}),Object(o.b)("p",null,"Your Rasa assistant can be used on training data in ",Object(o.b)("strong",{parentName:"p"},"any language"),".\nIf there are no word embeddings for your language, you can train your featurizers\nfrom scratch with the data you provide."),Object(o.b)("p",null,"In addition, we also support pre-trained word embeddings such as spaCy. For information on\nwhat pipeline is best for your use case, check out ",Object(o.b)("a",Object(t.a)({parentName:"p"},{href:"/docs/rasa/next/tuning-your-model#how-to-choose-a-pipeline"}),"choosing a pipeline"),"."),Object(o.b)("h2",{id:"training-a-model-in-any-languages"},"Training a Model in Any Languages"),Object(o.b)("p",null,"The following pipeline can be used to train models in whitespace tokenizable languages:"),Object(o.b)("pre",null,Object(o.b)("code",Object(t.a)({parentName:"pre"},{className:"language-yaml-rasa",metastring:"(docs/sources/data/configs_for_docs/default_config.yml)","(docs/sources/data/configs_for_docs/default_config.yml)":!0}),'assistant_id: default_config_bot\nlanguage: "fr"  # your two-letter language code\n\npipeline:\n  - name: WhitespaceTokenizer\n  - name: RegexFeaturizer\n  - name: LexicalSyntacticFeaturizer\n  - name: CountVectorsFeaturizer\n  - name: CountVectorsFeaturizer\n    analyzer: "char_wb"\n    min_ngram: 1\n    max_ngram: 4\n  - name: DIETClassifier\n    epochs: 100\n  - name: EntitySynonymMapper\n  - name: ResponseSelector\n    epochs: 100\n')),Object(o.b)("p",null,"To train a Rasa model in your preferred language, define the pipeline in your ",Object(o.b)("inlineCode",{parentName:"p"},"config.yml"),".\nAfter you define the pipeline and generate some ",Object(o.b)("a",Object(t.a)({parentName:"p"},{href:"/docs/rasa/next/training-data-format"}),"NLU training data"),"\nin your chosen language, train the model by running the command:"),Object(o.b)("pre",null,Object(o.b)("code",Object(t.a)({parentName:"pre"},{className:"language-bash"}),"rasa train nlu\n")),Object(o.b)("p",null,"Once the training is finished, you can test your model's language skills.\nSee how your model interprets different input messages by running:"),Object(o.b)("pre",null,Object(o.b)("code",Object(t.a)({parentName:"pre"},{className:"language-bash"}),"rasa shell nlu\n")),Object(o.b)("div",{className:"admonition admonition-note alert alert--secondary"},Object(o.b)("div",Object(t.a)({parentName:"div"},{className:"admonition-heading"}),Object(o.b)("h5",{parentName:"div"},Object(o.b)("span",Object(t.a)({parentName:"h5"},{className:"admonition-icon"}),Object(o.b)("svg",Object(t.a)({parentName:"span"},{xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"}),Object(o.b)("path",Object(t.a)({parentName:"svg"},{fillRule:"evenodd",d:"M6.3 5.69a.942.942 0 0 1-.28-.7c0-.28.09-.52.28-.7.19-.18.42-.28.7-.28.28 0 .52.09.7.28.18.19.28.42.28.7 0 .28-.09.52-.28.7a1 1 0 0 1-.7.3c-.28 0-.52-.11-.7-.3zM8 7.99c-.02-.25-.11-.48-.31-.69-.2-.19-.42-.3-.69-.31H6c-.27.02-.48.13-.69.31-.2.2-.3.44-.31.69h1v3c.02.27.11.5.31.69.2.2.42.31.69.31h1c.27 0 .48-.11.69-.31.2-.19.3-.42.31-.69H8V7.98v.01zM7 2.3c-3.14 0-5.7 2.54-5.7 5.68 0 3.14 2.56 5.7 5.7 5.7s5.7-2.55 5.7-5.7c0-3.15-2.56-5.69-5.7-5.69v.01zM7 .98c3.86 0 7 3.14 7 7s-3.14 7-7 7-7-3.12-7-7 3.14-7 7-7z"})))),"note")),Object(o.b)("div",Object(t.a)({parentName:"div"},{className:"admonition-content"}),Object(o.b)("p",{parentName:"div"},"Even more so when training word embeddings from scratch, more training data will lead to a\nbetter model! If you find your model is having trouble discerning your inputs, try training\nwith more example sentences."))),Object(o.b)("h2",{id:"using-pre-trained-language-models"},"Using Pre-trained Language Models"),Object(o.b)("p",null,"If you can find them in your language, language models with pre-trained word vectors are a great way to get started with less data,\nas the word vectors are trained on large amounts of data such as Wikipedia."),Object(o.b)("h3",{id:"spacy"},"spaCy"),Object(o.b)("p",null,"With the ",Object(o.b)("a",Object(t.a)({parentName:"p"},{href:"/docs/rasa/next/components#spacynlp"}),"Pre-trained Spacy Embeddings"),", you can use spaCy's\n",Object(o.b)("a",Object(t.a)({parentName:"p"},{href:"https://spacy.io/usage/models#languages"}),"pre-trained language models")," or load fastText vectors, which are available\nfor ",Object(o.b)("a",Object(t.a)({parentName:"p"},{href:"https://github.com/facebookresearch/fastText/blob/master/docs/crawl-vectors.md"}),"hundreds of languages"),". If you want\nto incorporate a custom model you've found into spaCy, check out their page on\n",Object(o.b)("a",Object(t.a)({parentName:"p"},{href:"https://spacy.io/docs/usage/adding-languages"}),"adding languages"),". As described in the documentation, you need to\nregister your language model and link it to the language identifier, which will allow Rasa to load and use your new language\nby passing in your language identifier as the ",Object(o.b)("inlineCode",{parentName:"p"},"language")," option."),Object(o.b)("h3",{id:"mitie"},"MITIE"),Object(o.b)("p",null,"You can also pre-train your own word vectors from a language corpus using ",Object(o.b)("a",Object(t.a)({parentName:"p"},{href:"/docs/rasa/next/components#mitienlp"}),"MITIE"),". To do so:"),Object(o.b)("ol",null,Object(o.b)("li",{parentName:"ol"},"Get a clean language corpus (a Wikipedia dump works) as a set of text files."),Object(o.b)("li",{parentName:"ol"},"Build and run ",Object(o.b)("inlineCode",{parentName:"li"},"MITIE Wordrep Tool"),"_ on your corpus.\nThis can take several hours/days depending on your dataset and your workstation.\nYou'll need something like 128GB of RAM for wordrep to run -- yes, that's a lot: try to extend your swap."),Object(o.b)("li",{parentName:"ol"},"Set the path of your new ",Object(o.b)("inlineCode",{parentName:"li"},"total_word_feature_extractor.dat")," as the ",Object(o.b)("inlineCode",{parentName:"li"},"model")," parameter in your\n",Object(o.b)("a",Object(t.a)({parentName:"li"},{href:"/docs/rasa/next/components#mitienlp"}),"configuration"),".")),Object(o.b)("p",null,"For a full example of how to train MITIE word vectors, check out\n",Object(o.b)("a",Object(t.a)({parentName:"p"},{href:"http://www.crownpku.com/2017/07/27/%E7%94%A8Rasa_NLU%E6%9E%84%E5%BB%BA%E8%87%AA%E5%B7%B1%E7%9A%84%E4%B8%AD%E6%96%87NLU%E7%B3%BB%E7%BB%9F.html"}),"this blogpost"),"\nof creating a MITIE model from a Chinese Wikipedia dump."))}u.isMDXComponent=!0}}]);</pre></body></html>