{"id":33464,"date":"2025-10-03T15:14:36","date_gmt":"2025-10-03T13:14:36","guid":{"rendered":"https:\/\/www.azzurrodigitale.com\/?p=33464"},"modified":"2026-04-07T12:10:52","modified_gmt":"2026-04-07T10:10:52","slug":"one-login-endless-possibilities-the-single-sign-on-revolution-2","status":"publish","type":"post","link":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/","title":{"rendered":"LLMs and RAG \u2013 A Glimpse into the Future of Generative Artificial Intelligence"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_85 ez-toc-wrap-left counter-hierarchy ez-toc-counter ez-toc-custom ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">Indice dei contenuti<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #21bdff;color:#21bdff\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #21bdff;color:#21bdff\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#Why_LLM_and_RAG_are_a_game_changer_for_businesses\" >Why LLM and RAG are a game changer for businesses<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#What_is_an_LLM\" >What is an LLM?<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#Limitations_of_LLMs_updating_and_cultural_bias\" >Limitations of LLMs: updating and cultural bias<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#RAGs_Explained\" >RAGs, Explained<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#How_does_it_work_in_practice\" >How does it work in practice?<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#Why_we_talk_about_tokens_chunks_vectors_and_similarity_search\" >Why we talk about tokens, chunks, vectors and similarity search<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#Operating_flows\" >Operating flows<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#Wepladoo_for_Fratelli_Poli\" >Wepladoo for Fratelli Poli<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#Deep_Tech_for_Insiders\" >Deep Tech for Insiders<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#How_does_a_RAG_work\" >How does a RAG work?<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-11\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#Communications_to_LLM\" >Communications to LLM<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-12\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#QuestionAnswer\" >Question\/Answer<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-13\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#RAG_DB_connected\" >RAG DB connected<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-14\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#Developer_Glossary\" >Developer Glossary<\/a><\/li><\/ul><\/nav><\/div>\n\n<h4 class=\"wp-block-heading\">Written by Mattia Gottardello &#8211; Full-stack Developer in AzzurroDigitale<\/h4>\n\n<p class=\"wp-block-paragraph\"><em>Large Language Models and RAG systems are transforming the way companies use artificial intelligence. These technologies enable the automation of customer support with accurate responses, the intelligent management of business documents, and the querying of databases in natural language. This article explores how these tools work, the concrete benefits for businesses, and the technical limitations to keep in mind: a strategic guide to integrating generative AI into your company.  <\/em><\/p>\n\n<div style=\"height:60px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"Why_LLM_and_RAG_are_a_game_changer_for_businesses\"><\/span>Why LLM and RAG are a game changer for businesses<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">Le tecnologie <strong>LLM <\/strong>(Large Language Model) e <strong>RAG<\/strong>(Retrieval-Augmented Generation) stanno rivoluzionando il modo in cui le aziende possono gestire, valorizzare e mettere a disposizione la propria conoscenza. In un contesto in cui la quantit\u00e0 di dati e documenti cresce ogni giorno, trovare rapidamente l\u2019informazione giusta diventa una sfida sempre pi\u00f9 complessa. Qui entrano in gioco gli <a href=\"https:\/\/www.azzurrodigitale.com\/oltre-lhype-cosa-sono-gli-llm-e-perche-cambiano-le-regole-del-gioco\/\">LLM<\/a> e, soprattutto, i sistemi RAG.<\/p>\n\n<p class=\"wp-block-paragraph\">Imagine a company that needs to provide technical support to customers: thanks to a RAG system, the chatbot can consult manuals, FAQs and internal documentation in real time and respond accurately even to very specific questions, drastically reducing waiting times and improving customer satisfaction. Or think of a team of consultants who, instead of searching through dozens of files and regulations, can obtain accurate and up-to-date answers simply by asking a question in natural language. <\/p>\n\n<p class=\"wp-block-paragraph\">These technologies also enable:<\/p>\n\n<ul class=\"wp-block-list\">\n<li><strong>Automate repetitive processes<\/strong> such as compiling reports, searching for data or generating documents<\/li>\n\n\n\n<li><strong>Reduce the risk of errors <\/strong>with answers based on verified and up-to-date sources<\/li>\n\n\n\n<li><strong>Customise the user experience<\/strong> by drawing on both public data and confidential company information<\/li>\n\n\n\n<li><strong>Integrate structured and unstructured data<\/strong>: RAGs can combine information from databases and documents, offering a comprehensive and cross-cutting view<\/li>\n<\/ul>\n\n<p class=\"wp-block-paragraph\"><mark class=\"has-inline-color has-black-color\">and all this without having to retrain the model every time the data changes.<\/mark><\/p>\n\n<p class=\"wp-block-paragraph\">Of course, adopting these solutions requires careful consideration: technical limitations (such as token, chunk and vector management), infrastructure costs and the quality of the sources used must all be evaluated. However, the benefits in terms of efficiency, speed of access to information and quality of service are such that LLM and RAG are a strategic choice for all companies that want to innovate and remain competitive. <\/p>\n\n<p class=\"wp-block-paragraph\">In this article, you will discover how these technologies work, what their limitations are, their most common use cases, and the challenges you will face in successfully adopting them in your business.<\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"What_is_an_LLM\"><\/span>What is an LLM?<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">A <strong>Large Language Model<\/strong> is an artificial intelligence trained on enormous amounts of text to understand and generate natural language. These models, such as GPT-4, are capable of answering questions, writing texts, translating languages, and much more. <\/p>\n\n<p class=\"wp-block-paragraph\"><strong>Examples of everyday use:<\/strong><\/p>\n\n<ul class=\"wp-block-list\">\n<li>Smart chatbots (e.g. virtual assistants)<\/li>\n\n\n\n<li>Automatic generation of emails or documents<\/li>\n\n\n\n<li>Automatic translation<\/li>\n<\/ul>\n\n<p class=\"wp-block-paragraph\">Training and operating these models requires enormous computational resources, such as powerful GPUs (Graphics Processing Units) and specialised servers. Even everyday use (inference<a id=\"5d1e1054-f2c0-4ba0-8371-2ba613582595-link\" href=\"#5d1e1054-f2c0-4ba0-8371-2ba613582595\">1<\/a>) can require a lot of computing power, which is why they are often run on the cloud. <\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"Limitations_of_LLMs_updating_and_cultural_bias\"><\/span><strong>Limitations of LLMs: updating and cultural bias<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">One of the main limitations of LLMs is that <strong>their knowledge is limited to the date on which they were trained<\/strong>. This means that everything the model \u201cknows\u201d comes from the data collected up to that point: events, scientific discoveries, social or technological changes that occurred after the end of training are not present in its \u201cmemory\u201d. For example, an LLM trained in 2023 will not know about events that occurred in 2024 or 2025.  <\/p>\n\n<p class=\"wp-block-paragraph\">Furthermore, LLMs <strong>reflect the culture<\/strong>, <strong>values<\/strong>, and <strong>biases <\/strong>present in the data on which they were trained. If the source texts are predominantly in one language, from a certain geographical area, or represent a certain way of thinking, the model will tend to reproduce those same perspectives. This can lead to responses that are not neutral or that do not take cultural and social diversity into account.  <\/p>\n\n<p class=\"wp-block-paragraph\">For these reasons, it is important to use LLMs critically, be aware of their limitations and, when necessary, integrate them with technologies such as RAGs that allow access to up-to-date and more specific information.<\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\" id=\"&#x1F3E2;-Benefici-per-l&#x2019;Azienda\"><span class=\"ez-toc-section\" id=\"RAGs_Explained\"><\/span><strong>RAGs, Explained<\/strong><span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\"><strong>Retrieval-Augmented Generation<\/strong> is a technique that combines the power of LLMs with the ability to retrieve up-to-date information from external sources (databases, documents, the web). In practice, the model does not rely solely on what it has learned during training, but can \u201cgo out and find\u201d fresh and relevant information.<br \/>In this way: <\/p>\n\n<ul class=\"wp-block-list\">\n<li><strong>Answers can always be up-to-date<\/strong> and grounded in real data<\/li>\n\n\n\n<li>The <strong>risk of fabricated responses<\/strong> is reduced because the model is based on concrete sources<\/li>\n\n\n\n<li>You can <strong>customise responses <\/strong>using company or private data<\/li>\n\n\n\n<li>LLMs are becoming increasingly useful, evolving into flexible tools better suited for professional and fast-changing environments.<\/li>\n<\/ul>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"How_does_it_work_in_practice\"><\/span>How does it work in practice?<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">Here\u2019s what happens when you ask a RAG system a question:<\/p>\n\n<ol start=\"1\" class=\"wp-block-list\">\n<li>Search its archives or the internet for the documents most relevant to your request <\/li>\n\n\n\n<li>Pass this information to the language model<\/li>\n\n\n\n<li>Generates a more accurate and up-to-date response. <\/li>\n<\/ol>\n\n<div style=\"height:20px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<p class=\"wp-block-paragraph\">Examples of RAG usage:<\/p>\n\n<ul class=\"wp-block-list\">\n<li><strong>Advanced search engines<\/strong> (e.g. Perplexity, Bing Chat): they respond by citing up-to-date sources<\/li>\n\n\n\n<li><strong>Customer support<\/strong>: chatbots that respond to questions about products and services using internal documentation<\/li>\n\n\n\n<li><strong>Business tools<\/strong>: intelligent search for documents, policies, manuals.<\/li>\n<\/ul>\n\n<div style=\"height:20px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<p class=\"wp-block-paragraph\">The quality of the responses depends directly on the quality and relevance of the sources to which the model has access: the more reliable and consistent with the request the retrieved documents are, the more accurate the output will be. For this reason, it is essential to integrate the system with the right sources, which can be company databases, internal knowledge bases or content from the web. <\/p>\n\n<p class=\"wp-block-paragraph\">When we talk about systems that \u201cread\u201d and \u201cunderstand\u201d large amounts of text, we must remember that machines do not interpret words in the same way we do. In order to process language, computers must transform text into a form that they can understand and manipulate. This is where concepts such as tokens, chunks, vectors and similarity searches come into play: tools that allow RAGs to efficiently divide, represent and compare text in order to identify the most relevant information and generate pertinent responses.  <\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"Why_we_talk_about_tokens_chunks_vectors_and_similarity_search\"><\/span>Why we talk about tokens, chunks, vectors and similarity search<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">RAGs are systems that combine the ability to generate text (as large language models do) with the ability to retrieve information from a collection of documents or data. This allows for more up-to-date, accurate, and personalised responses. <\/p>\n\n<h3 class=\"wp-block-heading\" id=\"Come-funziona-un-RAG-in-pratica?\">How does a RAG work in practice?<\/h3>\n\n<ol start=\"1\" class=\"wp-block-list\">\n<li><strong>Receipt of the question<\/strong>: The user writes a question or request<\/li>\n\n\n\n<li><strong>Information retrieval<\/strong>: The system searches its archives (databases, documents, web) for the most relevant texts to respond<\/li>\n\n\n\n<li><strong>Response generation<\/strong>: The language model uses the retrieved information to construct a clear and comprehensive response<\/li>\n<\/ol>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<figure class=\"wp-block-image size-large\"><img fetchpriority=\"high\" decoding=\"async\" width=\"1024\" height=\"406\" src=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/Pasted-image-20250925181718-20250925-161718-1024x406.png\" alt=\"simplified RAG chart\" class=\"wp-image-33467\" srcset=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/Pasted-image-20250925181718-20250925-161718-1024x406.png 1024w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/Pasted-image-20250925181718-20250925-161718-300x119.png 300w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/Pasted-image-20250925181718-20250925-161718-768x305.png 768w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/Pasted-image-20250925181718-20250925-161718.png 1101w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<p class=\"wp-block-paragraph\">To accomplish this, RAGs must \u201cunderstand\u201d and compare large amounts of text. <br \/>This is where technical terms come into play:<\/p>\n\n<h4 class=\"wp-block-heading\">Tokens<\/h4>\n\n<p class=\"wp-block-paragraph\">LLMs do not read text like a person, but divide it into <strong>tokens<\/strong>, i.e. small units (words, parts of words, punctuation). This is done in order to process text more efficiently and uniformly. Each model has a maximum limit on the number of tokens it can handle at one time: if the question or documents are too long, they need to be cut. They are also the fundamental unit on which some AI service providers base their billing systems for paid models.   <\/p>\n\n<h4 class=\"wp-block-heading\">Chunks<\/h4>\n\n<p class=\"wp-block-paragraph\">To comply with the token limit, long texts are divided into <strong>chunks<\/strong>: small pieces of text, each of which can be processed by the model. This process is essential to allow the system to \u201cdigest\u201d even very long documents, but it can result in the loss of some context if important information is spread across multiple chunks. <\/p>\n\n<h4 class=\"wp-block-heading\">Vectors<\/h4>\n\n<p class=\"wp-block-paragraph\">Each chunk of text is transformed into a <strong>vector<\/strong>, i.e. a sequence of numbers representing the meaning of that piece of text. This transformation allows the system to compare different texts mathematically, even if they use different words to express similar concepts. <\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<figure class=\"wp-block-image size-full\"><img decoding=\"async\" width=\"976\" height=\"355\" src=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-2-1.png\" alt=\"\" class=\"wp-image-33471\" srcset=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-2-1.png 976w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-2-1-300x109.png 300w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-2-1-768x279.png 768w\" sizes=\"(max-width: 976px) 100vw, 976px\" \/><\/figure>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h4 class=\"wp-block-heading\">Similarity search<\/h4>\n\n<p class=\"wp-block-paragraph\">Once all the chunks have been transformed into vectors, the system can use <strong>similarity search<\/strong> to find, among all the available pieces of text, those most similar to the user&#8217;s query. In practice, the system searches for vectors that most \u201cresemble\u201d the query vector, i.e. that represent similar topics or concepts. <\/p>\n\n<h4 class=\"wp-block-heading\"><strong>Why is all this important?<\/strong><\/h4>\n\n<ul class=\"wp-block-list\">\n<li><strong>Efficiency<\/strong>: Dividing text into chunks and working with vectors allows large amounts of data to be handled quickly<\/li>\n\n\n\n<li><strong>Accuracy<\/strong>: Similarity search helps find the most relevant information, even if expressed in different ways<\/li>\n\n\n\n<li><strong>Technical limitations<\/strong>: Limits on tokens and vector sizes depend on the model used and affect the amount of text that can be analysed and the quality of responses<\/li>\n<\/ul>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"Operating_flows\"><\/span>Operating flows<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">The purpose of these paragraphs is:<\/p>\n\n<ul class=\"wp-block-list\">\n<li><strong>Support data entry<\/strong>: explain how data is transformed step by step: from the original FILES, which are divided into smaller CHUNKS, to numerical representation using VECTORS, ready to be used by the system<\/li>\n\n\n\n<li><strong>Question\/answer<\/strong>: explain how the system, starting from a question, identifies the relevant support files and generates an answer, providing information that is as accurate as possible and based on the data actually retrieved in relation to the question asked.<\/li>\n<\/ul>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h3 class=\"wp-block-heading\"><strong>Support data entry flow<\/strong><\/h3>\n\n<p class=\"wp-block-paragraph\">We start by retrieving files that the customer can independently place in a remote \u2018folder\u2019. Once the system detects the arrival of a new file, part of it takes care of retrieving it. The new document is registered and any checks on file size and format are carried out. This is because it is necessary to know how and with what we are working.<br \/>Once we have ensured that the file complies with our constraints, we can start dividing it into chunks according to our chosen methodology. The final step involves conversion to a vector.<br \/>At the end of the flow, it will therefore be possible to know, starting from a vector, which chunk it belonged to and, in turn, which file that chunk was generated from.   <\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<figure class=\"wp-block-image size-full\"><img decoding=\"async\" width=\"558\" height=\"297\" src=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-3.png\" alt=\"support data entry flow chart\" class=\"wp-image-33473\" srcset=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-3.png 558w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-3-300x160.png 300w\" sizes=\"(max-width: 558px) 100vw, 558px\" \/><\/figure>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h3 class=\"wp-block-heading\">Question\/answer flow:<\/h3>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"765\" height=\"475\" src=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-4.png\" alt=\"question\/answer flow chart\" class=\"wp-image-33475\" srcset=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-4.png 765w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-4-300x186.png 300w\" sizes=\"(max-width: 765px) 100vw, 765px\" \/><\/figure>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<p class=\"wp-block-paragraph\">Now comes the moment when the user submits the query: it is converted into a vector and then a similarity search is performed with the vectors present in our system. Obviously, all vectors will give a result because we are measuring a \u201cdistance\u201d between two \u201cideas\u201d, and this distance will always give a value. It is therefore necessary to decide on a minimum threshold that will include the vectors in the candidates for generating the response. The constraint can be imposed both in terms of the minimum value to be included in the set and the maximum number of vectors that can be included.   <\/p>\n\n<p class=\"wp-block-paragraph\">Once the vectors contributing to the generation of the response have been identified, the process becomes straightforward, as each vector is linked to the chunk that generated it. All the selected chunks will be retrieved and sent together with the question to the LLM, requesting it to generate a response based on the question and the information we have provided.<\/p>\n\n<p class=\"wp-block-paragraph\">It is possible (though not advisable) to feed additional files into the response generation system that set or attempt to set guidelines for the LLM&#8217;s response.<br \/>We have named these files enrichment files.<\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\" id=\"Wepladoo-per-Fratelli-Poli\"><span class=\"ez-toc-section\" id=\"Wepladoo_for_Fratelli_Poli\"><\/span>Wepladoo for Fratelli Poli<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">In the business case addressed by our team (if you want to learn more, you can find it <a href=\"https:\/\/www.azzurrodigitale.com\/f-lli-poli-e-linnovazione-al-servizio-del-know-how-industriale\/\">here<\/a>), the customer asked us to <strong>develop a chatbot capable of answering specific questions<\/strong> using two main sources of information: company documents made available by the customer through a remote folder and data stored directly in the Wepladoo application database. The goal was to enable users to obtain accurate, contextualised answers without having to manually consult either the documents or the database.<br \/>What better use for a RAG? <\/p>\n\n<p class=\"wp-block-paragraph\">We therefore implemented a web application that performs the operations described above, with which we achieved this objective.<\/p>\n\n<p class=\"wp-block-paragraph\">The division of operations allowed us to share responsibilities and processing loads, but made the challenge more complex in terms of orchestrating flows.<br \/>We therefore <strong>analysed the various LLMs <\/strong>and decided to rely on Gemini for one simple reason: greater flexibility in terms of the size and limits of the text it can handle, both in terms of questions and the amount of information that can be used to obtain answers with supporting data.<br \/>We have also set up a mechanism that allows <strong>automatic file recovery<\/strong>, so that the system is always up to date based on what the customer uploads to our remote \u2018folder\u2019.<\/p>\n\n<p class=\"wp-block-paragraph\">The <strong>prompt engineer <\/strong>played an important role. Since we had to provide limits and guidelines for generating responses, we developed prompts (in short: phrases, small pieces of text) that are always attached to questions, and the quality of these files is directly proportional to the generation of good responses. <\/p>\n\n<p class=\"wp-block-paragraph\">An <strong>additional challenge<\/strong> was linked to the fact that the customer requested to be able to obtain information about the Wepladoo application directly from the chatbot. So we came up with a mechanism that works in a very similar way to a normal RAG but also retrieves data (directly from the database) updated at the time of the query to provide a response to the user. <\/p>\n\n<p class=\"wp-block-paragraph\">The system, integrated with Wepladoo, has enabled operators to access information in real time from various company documents and from data in the database, greatly simplifying the search for and retrieval of answers.<\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<hr class=\"wp-block-separator has-text-color has-cyan-bluish-gray-color has-alpha-channel-opacity has-cyan-bluish-gray-background-color has-background is-style-default\" style=\"margin-top:300;margin-bottom:300\" \/>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading has-text-color has-link-color has-large-font-size wp-elements-fc012bf475b11b6cf9396693e942f31d\" style=\"color:#01bdff\"><span class=\"ez-toc-section\" id=\"Deep_Tech_for_Insiders\"><\/span>Deep Tech for Insiders <span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<div style=\"height:20px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"How_does_a_RAG_work\"><\/span>How does a RAG work?<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">A typical RAG system consists of two main parts:<\/p>\n\n<ol start=\"1\" class=\"wp-block-list\">\n<li><strong>Retriever<\/strong>: Given an input (e.g., a query), it searches for the most relevant documents in a database or vector index (often using embedding techniques such as FAISS, Pinecone, or Elasticsearch)<\/li>\n\n\n\n<li><strong>Generator<\/strong>: The LLM takes the retrieved documents and uses them as context to generate an accurate and informed response.<\/li>\n<\/ol>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h3 class=\"wp-block-heading\" id=\"Divisione-dei-compiti\">Division of tasks<\/h3>\n\n<p class=\"wp-block-paragraph\">The development of a RAG can be carried out using a monolithic infrastructure or micro-services, depending on requirements. The components involved, viewed as methods or individual micro-services, concern: <\/p>\n\n<ul class=\"wp-block-list\">\n<li><strong>File ingestion<\/strong>: retrieve and manage updated data sources that will then be used as a basis for generating responses<\/li>\n\n\n\n<li><strong>File chunking<\/strong>: dividing recovered files so that they fall within the limits described above for the various LLMs<\/li>\n\n\n\n<li><strong>Conversion to vector format<\/strong>: conversion of generated file fragments to vector format and cataloguing of these fragments according to the file to which they belong<\/li>\n\n\n\n<li><strong>Manage the question-answer flow<\/strong>: once the vectors, chunks and files have been prepared, it is necessary to put into practice the actual functioning of the RAG, i.e. the implementation of the <strong>retrieval system <\/strong>and the use of the <strong>generation system<\/strong>.<\/li>\n<\/ul>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"754\" height=\"402\" src=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-5.png\" alt=\"communications map\" class=\"wp-image-33477\" style=\"width:755px;height:auto\" srcset=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-5.png 754w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-5-300x160.png 300w\" sizes=\"(max-width: 754px) 100vw, 754px\" \/><\/figure>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"540\" src=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-6-1024x540.png\" alt=\"Wepladoo chart\" class=\"wp-image-33479\" srcset=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-6-1024x540.png 1024w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-6-300x158.png 300w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-6-768x405.png 768w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-6.png 1411w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h4 class=\"wp-block-heading\" id=\"Passo-1-&#x2013;-Redirect-iniziale\">Ingestion<\/h4>\n\n<p class=\"wp-block-paragraph\">This process must provide for the<strong> remote or fixed-location retrieval <\/strong>of one or more files, their initial cataloguing within the system, and the management of the various supported formats.<br \/>The decision on which formats to support will affect the <strong>chunking process<\/strong>, which is the first point at which the file content is actually accessed. The system must be able to recognise which formats are allowed and which are not, in order to prevent unsupported files from entering the system and rendering the entire application unusable. <\/p>\n\n<p class=\"wp-block-paragraph\">File census is also very useful for providing features related to <strong>updating files uploaded at different times<\/strong>, removing obsolete files that no longer contribute or cannot contribute to generating responses, and\/or simply checking what data the generated responses are based on (citing sources).<\/p>\n\n<p class=\"wp-block-paragraph\">The entire ingestion process also uses a <strong>queue system<\/strong>, which allows the various updates to be received in an orderly manner in the remote folder, preventing system congestion in the event of massive inputs and improving the entire flow from census to vectorisation.<br \/>Next, we will look at the call flow to ensure correct implementation of the system.<\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"873\" height=\"817\" src=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-7.png\" alt=\"call flow chart for the correct implementation of the system\" class=\"wp-image-33481\" srcset=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-7.png 873w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-7-300x281.png 300w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-7-768x719.png 768w\" sizes=\"(max-width: 873px) 100vw, 873px\" \/><\/figure>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h4 class=\"wp-block-heading\" id=\"Passo-2-&#x2013;-Autenticazione-e-verifica-MFA\">Chunking<\/h4>\n\n<p class=\"wp-block-paragraph\">File chunking is the second step towards the complete processing of new information. Knowing the limitations of the linguistic model on which it is based and being aware of the differences between one chunking method and another, it is necessary to choose how to proceed in order to divide the files that have been brought into the system.<br \/>In short, it is necessary to understand after how much text to create a chunk. <\/p>\n\n<p class=\"wp-block-paragraph\">There are several chunking methods, each with its own pros and cons:<\/p>\n\n<ol class=\"wp-block-list\">\n<li><strong>Token-based chunking<\/strong>: the text is divided into chunks containing a fixed number of tokens (e.g. 256 or 512 tokens per chunk)<\/li>\n\n\n\n<li><strong>Chunking based on paragraphs or sentences<\/strong>: the text is divided according to the natural boundaries of sentences or paragraphs<\/li>\n\n\n\n<li><strong>Chunking with overlapping<\/strong>: each new chunk shares part of the text with the previous chunk (e.g., 50 overlapping tokens)<\/li>\n\n\n\n<li><strong>Semantic chunking<\/strong>: the text is divided according to meaning, automatically identifying points where the topic changes or a concept ends<\/li>\n\n\n\n<li>Customised chunking: chunking is adapted to the specific requirements of the application (e.g. sections of manuals, chapters of books, database entries).<\/li>\n<\/ol>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h4 class=\"wp-block-heading\" id=\"Conversione-in-vettore\">Conversion to vector<\/h4>\n\n<p class=\"wp-block-paragraph\">Once the chunks that comply with the constraints imposed by the LLM in use have been generated, you can move on to <strong>generating the vectors<\/strong>.<br \/>The vectors will then be saved and linked both to the chunk from which they were generated and to the file to which that chunk belongs. This will allow you to obtain the sources from which a given response originated.<br \/>The conversion to vector format is mainly used to perform <strong>similarity searches<\/strong>, which allow the extraction of the chunks most similar to the user&#8217;s query, thereby significantly increasing the likelihood of obtaining a relevant response to the query submitted to the system.<br \/>To perform a similarity search, the query must also be converted into a vector, which means that there will also be a character limit for queries.<\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h4 class=\"wp-block-heading\" id=\"Tecniche-di-similarity-search\">Similarity search techniques<\/h4>\n\n<p class=\"wp-block-paragraph\"><strong>Similarity search <\/strong>is the process that allows RAG systems to find, among many pieces of text (chunks), those most similar to the user&#8217;s query. There are several techniques for comparing vectors representing texts, each with advantages and limitations: <\/p>\n\n<ol class=\"wp-block-list\">\n<li>Cosine Similarity<\/li>\n\n\n\n<li>Euclidean distance<\/li>\n\n\n\n<li>Manhattan distance (or L1)<\/li>\n\n\n\n<li>Approximate Nearest Neighbor (ANN)<\/li>\n<\/ol>\n\n<p class=\"wp-block-paragraph\">There are also more sophisticated methods, such as<strong> similarity based on neural networks <\/strong>or the <strong>use of hashing <\/strong>to reduce the complexity of calculations. These techniques are chosen based on the requirements for accuracy, speed and available resources. <\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"Communications_to_LLM\"><\/span>Communications to LLM<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">To interact with LLM models, you can use libraries that, through dedicated methods, provide the APIs of different AI providers in a single location.<\/p>\n\n<p class=\"wp-block-paragraph\">A notable example is <strong>LangChain<\/strong>, which offers support for both JavaScript and Python. Since most of these technologies originate in the Python environment, implementation in this language is more comprehensive and mature than in JavaScript, which nevertheless guarantees all essential functionality. <\/p>\n\n<p class=\"wp-block-paragraph\">Thanks to these tools, functions have been developed that make it easy to generate vectors and send queries to various LLMs, using only access keys and specifying the desired model.<\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"QuestionAnswer\"><\/span>Question\/Answer<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">Here is a small representation of the call flow regarding the generation of the response starting from the question.<\/p>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"528\" src=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-8-1024x528.png\" alt=\"call flow chart for question\/answer\" class=\"wp-image-33483\" srcset=\"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-8-1024x528.png 1024w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-8-300x155.png 300w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-8-768x396.png 768w, https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/img-articolo-8.png 1316w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n<div style=\"height:30px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading\" id=\"Passo-3-&#x2013;-Rilascio-dell&#x2019;Authorization-Code\"><span class=\"ez-toc-section\" id=\"RAG_DB_connected\"><\/span>RAG DB connected<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<p class=\"wp-block-paragraph\">The implementation of this small variation on a standard RAG adds an additional step, which is to <strong>retrieve information directly from a remote database<\/strong>.<\/p>\n\n<p class=\"wp-block-paragraph\">A fundamental requirement is knowledge of the<strong> database structure<\/strong>, which will enable you to request the generation of a query<sup data-fn=\"8b4b326a-4739-4a08-bc59-fbd919ccfddc\" class=\"fn\">\n  <a id=\"8b4b326a-4739-4a08-bc59-fbd919ccfddc-link\" href=\"#8b4b326a-4739-4a08-bc59-fbd919ccfddc\">2<\/a>\n<\/sup> to retrieve the data to be used to generate the response.<br \/>The preparation flow is therefore as follows:<\/p>\n\n<ol start=\"1\" class=\"wp-block-list\">\n<li>create a file containing (in SQL language<a id=\"19de7292-4bae-4bb4-93ce-706fa13f5bc5-link\" href=\"#19de7292-4bae-4bb4-93ce-706fa13f5bc5\">\n  <sup data-fn=\"19de7292-4bae-4bb4-93ce-706fa13f5bc5\" class=\"fn\">3<\/sup>\n<\/a>)<br \/> the instructions used to create the database of interest<\/li>\n\n\n\n<li>have the system process the file as if it were a normal document file<\/li>\n\n\n\n<li>use the file without exploiting similarity search (we know that the structure is static and therefore contained only in that file, which in turn generates or can generate a chunk)<\/li>\n<\/ol>\n\n<div style=\"height:20px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<p class=\"wp-block-paragraph\">The question-answer flow:<\/p>\n\n<ol start=\"1\" class=\"wp-block-list\">\n<li>Obtain the application form<\/li>\n\n\n\n<li>Recover the file containing the database structure<\/li>\n\n\n\n<li>Ask the chosen LLM to generate a query that leverages the combination of the question and the database structure<\/li>\n\n\n\n<li>Various checks to validate\/block dangerous operations and execute database queries<\/li>\n\n\n\n<li>Achieving results (which may or may not be present)<\/li>\n\n\n\n<li>Generation of the response, which once again involves the question together with the results obtained; this time, the response will be provided in natural language<\/li>\n<\/ol>\n\n<div style=\"height:20px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<p class=\"wp-block-paragraph\">To facilitate the management of the two mechanisms, it was decided to keep the two chatbots separate, even graphically: one allows users to ask questions and obtain answers<strong> based on documents<\/strong>, while the other is <strong>based on database data<\/strong>.<\/p>\n\n<div style=\"height:50px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n<h2 class=\"wp-block-heading has-text-color has-link-color has-large-font-size wp-elements-b0f9f62f548cd391d9c3c562ade396af\" style=\"color:#01bdff\"><span class=\"ez-toc-section\" id=\"Developer_Glossary\"><\/span>Developer Glossary<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n<div class=\"wp-block-group is-layout-constrained wp-block-group-is-layout-constrained\"><ol class=\"wp-block-footnotes\"><li id=\"5d1e1054-f2c0-4ba0-8371-2ba613582595\"><strong>Inference<\/strong> is the stage at which an artificial intelligence model is used to generate responses or make predictions after being trained. In practice, when you write a question or text, the model processes the input and produces an output (e.g., a response or sentence completion). Unlike training, which requires a lot of computing power and time, inference is when the model \u201cputs into practice\u201d what it has learned, typically in real time or near real time, to provide answers to users.  <br> <a href=\"#5d1e1054-f2c0-4ba0-8371-2ba613582595-link\" aria-label=\"Jump to footnote reference 1\">\u21a9\ufe0e<\/a><\/li><li id=\"8b4b326a-4739-4a08-bc59-fbd919ccfddc\">A <strong>query<\/strong> is a request made to a database or computer system to obtain specific information, usually using a structured language such as SQL.<br> <a href=\"#8b4b326a-4739-4a08-bc59-fbd919ccfddc-link\" aria-label=\"Jump to footnote reference 2\">\u21a9\ufe0e<\/a><\/li><li id=\"19de7292-4bae-4bb4-93ce-706fa13f5bc5\"><strong>Structured Query Language<\/strong>, a language that allows queries and operations to be performed on a database.<br> <a href=\"#19de7292-4bae-4bb4-93ce-706fa13f5bc5-link\" aria-label=\"Jump to footnote reference 3\">\u21a9\ufe0e<\/a><\/li><\/ol><\/div>\n\n<p class=\"wp-block-paragraph\"><\/p>\n","protected":false},"excerpt":{"rendered":"<p>LLMs and RAG systems are transforming the corporate use of AI: from customer support to document management. Discover how it works, its advantages and limitations for integrating generative AI. <\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":"[{\"content\":\"<strong>Inference<\\\/strong> is the stage at which an artificial intelligence model is used to generate responses or make predictions after being trained. In practice, when you write a question or text, the model processes the input and produces an output (e.g., a response or sentence completion). Unlike training, which requires a lot of computing power and time, inference is when the model \\u201cputs into practice\\u201d what it has learned, typically in real time or near real time, to provide answers to users.  <br>\",\"id\":\"5d1e1054-f2c0-4ba0-8371-2ba613582595\"},{\"content\":\"A <strong>query<\\\/strong> is a request made to a database or computer system to obtain specific information, usually using a structured language such as SQL.<br>\",\"id\":\"8b4b326a-4739-4a08-bc59-fbd919ccfddc\"},{\"content\":\"<strong>Structured Query Language<\\\/strong>, a language that allows queries and operations to be performed on a database.<br>\",\"id\":\"19de7292-4bae-4bb4-93ce-706fa13f5bc5\"}]"},"categories":[159],"tags":[],"class_list":["post-33464","post","type-post","status-publish","format-standard","hentry","category-data-ai-en"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.8 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>LLM e RAG - AzzurroDigitale<\/title>\n<meta name=\"description\" content=\"Discover how LLMs and RAG are transforming generative AI: use cases, benefits, and challenges for companies looking to innovate and stay competitive\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"LLM e RAG - AzzurroDigitale\" \/>\n<meta property=\"og:description\" content=\"Discover how LLMs and RAG are transforming generative AI: use cases, benefits, and challenges for companies looking to innovate and stay competitive\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/\" \/>\n<meta property=\"og:site_name\" content=\"AzzurroDigitale\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/azzurrodigitale\" \/>\n<meta property=\"article:published_time\" content=\"2025-10-03T13:14:36+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2026-04-07T10:10:52+00:00\" \/>\n<meta name=\"author\" content=\"admin_azzurrodigitale\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin_azzurrodigitale\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"17 minutes\" \/>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"LLM e RAG - AzzurroDigitale","description":"Discover how LLMs and RAG are transforming generative AI: use cases, benefits, and challenges for companies looking to innovate and stay competitive","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/","og_locale":"en_US","og_type":"article","og_title":"LLM e RAG - AzzurroDigitale","og_description":"Discover how LLMs and RAG are transforming generative AI: use cases, benefits, and challenges for companies looking to innovate and stay competitive","og_url":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/","og_site_name":"AzzurroDigitale","article_publisher":"https:\/\/www.facebook.com\/azzurrodigitale","article_published_time":"2025-10-03T13:14:36+00:00","article_modified_time":"2026-04-07T10:10:52+00:00","author":"admin_azzurrodigitale","twitter_card":"summary_large_image","twitter_misc":{"Written by":"admin_azzurrodigitale","Est. reading time":"17 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#article","isPartOf":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/"},"author":{"name":"admin_azzurrodigitale","@id":"https:\/\/www.azzurrodigitale.com\/en\/#\/schema\/person\/08e8584fdba977b08b34fe5a95579bac"},"headline":"LLMs and RAG \u2013 A Glimpse into the Future of Generative Artificial Intelligence","datePublished":"2025-10-03T13:14:36+00:00","dateModified":"2026-04-07T10:10:52+00:00","mainEntityOfPage":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/"},"wordCount":3402,"publisher":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/#organization"},"image":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#primaryimage"},"thumbnailUrl":"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/Pasted-image-20250925181718-20250925-161718-1024x406.png","articleSection":["Data &amp; AI"],"inLanguage":"en-US"},{"@type":"WebPage","@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/","url":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/","name":"LLM e RAG - AzzurroDigitale","isPartOf":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/#website"},"primaryImageOfPage":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#primaryimage"},"image":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#primaryimage"},"thumbnailUrl":"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/Pasted-image-20250925181718-20250925-161718-1024x406.png","datePublished":"2025-10-03T13:14:36+00:00","dateModified":"2026-04-07T10:10:52+00:00","description":"Discover how LLMs and RAG are transforming generative AI: use cases, benefits, and challenges for companies looking to innovate and stay competitive","breadcrumb":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#primaryimage","url":"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/Pasted-image-20250925181718-20250925-161718-1024x406.png","contentUrl":"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2025\/10\/Pasted-image-20250925181718-20250925-161718-1024x406.png"},{"@type":"BreadcrumbList","@id":"https:\/\/www.azzurrodigitale.com\/en\/one-login-endless-possibilities-the-single-sign-on-revolution-2\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/www.azzurrodigitale.com\/en\/"},{"@type":"ListItem","position":2,"name":"LLMs and RAG \u2013 A Glimpse into the Future of Generative Artificial Intelligence"}]},{"@type":"WebSite","@id":"https:\/\/www.azzurrodigitale.com\/en\/#website","url":"https:\/\/www.azzurrodigitale.com\/en\/","name":"AzzurroDigitale","description":"","publisher":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.azzurrodigitale.com\/en\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/www.azzurrodigitale.com\/en\/#organization","name":"AzzurroDigitale","url":"https:\/\/www.azzurrodigitale.com\/en\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/www.azzurrodigitale.com\/en\/#\/schema\/logo\/image\/","url":"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2024\/10\/logo-azzurro-digitale.svg","contentUrl":"https:\/\/www.azzurrodigitale.com\/wp-content\/uploads\/2024\/10\/logo-azzurro-digitale.svg","width":503,"height":64,"caption":"AzzurroDigitale"},"image":{"@id":"https:\/\/www.azzurrodigitale.com\/en\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/azzurrodigitale","https:\/\/www.youtube.com\/channel\/UC1NqEqHgcztU_2GIVr9Turg","https:\/\/www.linkedin.com\/company\/azzurrodigitale"]},{"@type":"Person","@id":"https:\/\/www.azzurrodigitale.com\/en\/#\/schema\/person\/08e8584fdba977b08b34fe5a95579bac","name":"admin_azzurrodigitale","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/secure.gravatar.com\/avatar\/14f3ed54afc960ebc77550569bee0ed7847cce88383f18b824287049df8ef1f5?s=96&d=mm&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/14f3ed54afc960ebc77550569bee0ed7847cce88383f18b824287049df8ef1f5?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/14f3ed54afc960ebc77550569bee0ed7847cce88383f18b824287049df8ef1f5?s=96&d=mm&r=g","caption":"admin_azzurrodigitale"},"sameAs":["http:\/\/azzurrodigitale.com"]}]}},"_links":{"self":[{"href":"https:\/\/www.azzurrodigitale.com\/en\/wp-json\/wp\/v2\/posts\/33464","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.azzurrodigitale.com\/en\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.azzurrodigitale.com\/en\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.azzurrodigitale.com\/en\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.azzurrodigitale.com\/en\/wp-json\/wp\/v2\/comments?post=33464"}],"version-history":[{"count":0,"href":"https:\/\/www.azzurrodigitale.com\/en\/wp-json\/wp\/v2\/posts\/33464\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.azzurrodigitale.com\/en\/wp-json\/wp\/v2\/media?parent=33464"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.azzurrodigitale.com\/en\/wp-json\/wp\/v2\/categories?post=33464"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.azzurrodigitale.com\/en\/wp-json\/wp\/v2\/tags?post=33464"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}