##############################################
Running 'f2c34e8d-b613-4f25-ab1e-943c5fb8837a'
##############################################
pending............
running......................................................
completed
Training of 'f2c34e8d-b613-4f25-ab1e-943c5fb8837a' finished successfully.
{'entity': {'hardware_spec': {'id': 'a6c4923b-b8e4-444c-9f43-8a7ec3020110',
'name': 'L'},
'input_data_references': [{'connection': {'id': '5841723d-848f-440a-82b0-b6ad59d983ec'},
'location': {'bucket': 'autoai-rag-with-extraction-experiment',
'file_name': 'granite_code_models_paper.md'},
'type': 'connection_asset'}],
'parameters': {'constraints': {'max_number_of_rag_patterns': 5},
'optimization': {'metrics': ['answer_correctness']},
'output_logs': True},
'results': [{'context': {'iteration': 0,
'max_combinations': 240,
'rag_pattern': {'composition_steps': ['model_selection',
'chunking',
'embeddings',
'retrieval',
'generation'],
'duration_seconds': 17,
'location': {'evaluation_results': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern1/evaluation_results.json',
'indexing_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern1/indexing_inference_notebook.ipynb',
'inference_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern1/indexing_inference_notebook.ipynb',
'inference_service_code': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern1/inference_ai_service.gz',
'inference_service_metadata': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern1/inference_service_metadata.json'},
'name': 'Pattern1',
'settings': {'chunking': {'chunk_overlap': 256,
'chunk_size': 1024,
'method': 'recursive'},
'embeddings': {'model_id': 'intfloat/multilingual-e5-large',
'truncate_input_tokens': 512,
'truncate_strategy': 'left'},
'generation': {'context_template_text': '[Document]\n{document}\n[End]',
'model_id': 'ibm/granite-3-8b-instruct',
'parameters': {'decoding_method': 'greedy',
'max_new_tokens': 1000,
'max_sequence_length': 131072,
'min_new_tokens': 1},
'prompt_template_text': '<|system|>\nYou are Granite Chat, an AI language model developed by IBM. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.<|user|>\nYou are an AI language model designed to function as a specialized Retrieval Augmented Generation (RAG) assistant. When generating responses, prioritize correctness, i.e., ensure that your response is grounded in context and user query. Always make sure that your response is relevant to the question. \nAnswer Length: detailed\n{reference_documents}\nRespond exclusively in the language of the question, regardless of any other language used in the provided context. Ensure that your entire response is in the same language as the question.\n{question} \n\n<|assistant|>',
'word_to_token_ratio': 2.573},
'retrieval': {'method': 'window',
'number_of_chunks': 5,
'window_size': 2},
'vector_store': {'datasource_type': 'chroma',
'distance_metric': 'cosine',
'index_name': 'autoai_rag_f2c34e8d_20250626153459',
'operation': 'upsert',
'schema': {'fields': [{'description': 'text field',
'name': 'text',
'role': 'text',
'type': 'string'},
{'description': 'document name field',
'name': 'document_id',
'role': 'document_name',
'type': 'string'},
{'description': 'chunk starting token position in the source document',
'name': 'start_index',
'role': 'start_index',
'type': 'number'},
{'description': 'chunk number per document',
'name': 'sequence_number',
'role': 'sequence_number',
'type': 'number'},
{'description': 'vector embeddings',
'name': 'vector',
'role': 'vector_embeddings',
'type': 'array'}],
'id': 'autoai_rag_1.0',
'name': 'Document schema using open-source loaders',
'type': 'struct'}}},
'settings_importance': {'chunking': [{'importance': 0.125,
'parameter': 'chunk_size'},
{'importance': 0.125, 'parameter': 'chunk_overlap'}],
'embeddings': [{'importance': 0.125, 'parameter': 'embedding_model'}],
'generation': [{'importance': 0.125, 'parameter': 'foundation_model'}],
'retrieval': [{'importance': 0.125, 'parameter': 'retrieval_method'},
{'importance': 0.125, 'parameter': 'window_size'},
{'importance': 0.125, 'parameter': 'number_of_chunks'}]}},
'software_spec': {'name': 'autoai-rag_rt24.1-py3.11'}},
'metrics': {'test_data': [{'ci_high': 1.0,
'ci_low': 0.6578,
'mean': 0.7813,
'metric_name': 'answer_correctness'},
{'ci_high': 0.8336,
'ci_low': 0.4882,
'mean': 0.7,
'metric_name': 'faithfulness'},
{'mean': 1.0, 'metric_name': 'context_correctness'}]}},
{'context': {'iteration': 1,
'max_combinations': 240,
'rag_pattern': {'composition_steps': ['model_selection',
'chunking',
'embeddings',
'retrieval',
'generation'],
'duration_seconds': 11,
'location': {'evaluation_results': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern2/evaluation_results.json',
'indexing_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern2/indexing_inference_notebook.ipynb',
'inference_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern2/indexing_inference_notebook.ipynb',
'inference_service_code': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern2/inference_ai_service.gz',
'inference_service_metadata': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern2/inference_service_metadata.json'},
'name': 'Pattern2',
'settings': {'chunking': {'chunk_overlap': 256,
'chunk_size': 1024,
'method': 'recursive'},
'embeddings': {'model_id': 'intfloat/multilingual-e5-large',
'truncate_input_tokens': 512,
'truncate_strategy': 'left'},
'generation': {'context_template_text': '[document]: {document}\n',
'model_id': 'meta-llama/llama-3-3-70b-instruct',
'parameters': {'decoding_method': 'greedy',
'max_new_tokens': 1000,
'max_sequence_length': 131072,
'min_new_tokens': 1},
'prompt_template_text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don’t know the answer to a question, please don’t share false information.\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n{reference_documents}\n[conversation]: {question}. Answer with no more than 150 words. If you cannot base your answer on the given document, please state that you do not have an answer. Respond exclusively in the language of the question, regardless of any other language used in the provided context. Ensure that your entire response is in the same language as the question.\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>',
'word_to_token_ratio': 2.1967},
'retrieval': {'method': 'window',
'number_of_chunks': 3,
'window_size': 4},
'vector_store': {'datasource_type': 'chroma',
'distance_metric': 'cosine',
'index_name': 'autoai_rag_f2c34e8d_20250626153459',
'operation': 'upsert',
'schema': {'fields': [{'description': 'text field',
'name': 'text',
'role': 'text',
'type': 'string'},
{'description': 'document name field',
'name': 'document_id',
'role': 'document_name',
'type': 'string'},
{'description': 'chunk starting token position in the source document',
'name': 'start_index',
'role': 'start_index',
'type': 'number'},
{'description': 'chunk number per document',
'name': 'sequence_number',
'role': 'sequence_number',
'type': 'number'},
{'description': 'vector embeddings',
'name': 'vector',
'role': 'vector_embeddings',
'type': 'array'}],
'id': 'autoai_rag_1.0',
'name': 'Document schema using open-source loaders',
'type': 'struct'}}},
'settings_importance': {'chunking': [{'importance': 0.0,
'parameter': 'chunk_size'},
{'importance': 0.0, 'parameter': 'chunk_overlap'}],
'embeddings': [{'importance': 0.0, 'parameter': 'embedding_model'}],
'generation': [{'importance': 0.5283019,
'parameter': 'foundation_model'}],
'retrieval': [{'importance': 0.0, 'parameter': 'retrieval_method'},
{'importance': 0.24528302, 'parameter': 'window_size'},
{'importance': 0.2264151, 'parameter': 'number_of_chunks'}]}},
'software_spec': {'name': 'autoai-rag_rt24.1-py3.11'}},
'metrics': {'test_data': [{'ci_high': 0.7662,
'ci_low': 0.5556,
'mean': 0.6895,
'metric_name': 'answer_correctness'},
{'ci_high': 0.751,
'ci_low': 0.3462,
'mean': 0.6085,
'metric_name': 'faithfulness'},
{'mean': 1.0, 'metric_name': 'context_correctness'}]}},
{'context': {'iteration': 2,
'max_combinations': 240,
'rag_pattern': {'composition_steps': ['model_selection',
'chunking',
'embeddings',
'retrieval',
'generation'],
'duration_seconds': 5,
'location': {'evaluation_results': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern3/evaluation_results.json',
'indexing_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern3/indexing_inference_notebook.ipynb',
'inference_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern3/indexing_inference_notebook.ipynb',
'inference_service_code': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern3/inference_ai_service.gz',
'inference_service_metadata': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern3/inference_service_metadata.json'},
'name': 'Pattern3',
'settings': {'chunking': {'chunk_overlap': 128,
'chunk_size': 512,
'method': 'recursive'},
'embeddings': {'model_id': 'ibm/slate-125m-english-rtrvr-v2',
'truncate_input_tokens': 512,
'truncate_strategy': 'left'},
'generation': {'context_template_text': '[Document]\n{document}\n[End]',
'model_id': 'ibm/granite-3-8b-instruct',
'parameters': {'decoding_method': 'greedy',
'max_new_tokens': 1000,
'max_sequence_length': 131072,
'min_new_tokens': 1},
'prompt_template_text': '<|system|>\nYou are Granite Chat, an AI language model developed by IBM. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.<|user|>\nYou are an AI language model designed to function as a specialized Retrieval Augmented Generation (RAG) assistant. When generating responses, prioritize correctness, i.e., ensure that your response is grounded in context and user query. Always make sure that your response is relevant to the question. \nAnswer Length: detailed\n{reference_documents}\nRespond exclusively in the language of the question, regardless of any other language used in the provided context. Ensure that your entire response is in the same language as the question.\n{question} \n\n<|assistant|>',
'word_to_token_ratio': 2.573},
'retrieval': {'method': 'window',
'number_of_chunks': 3,
'window_size': 4},
'vector_store': {'datasource_type': 'chroma',
'distance_metric': 'cosine',
'index_name': 'autoai_rag_f2c34e8d_20250626153542',
'operation': 'upsert',
'schema': {'fields': [{'description': 'text field',
'name': 'text',
'role': 'text',
'type': 'string'},
{'description': 'document name field',
'name': 'document_id',
'role': 'document_name',
'type': 'string'},
{'description': 'chunk starting token position in the source document',
'name': 'start_index',
'role': 'start_index',
'type': 'number'},
{'description': 'chunk number per document',
'name': 'sequence_number',
'role': 'sequence_number',
'type': 'number'},
{'description': 'vector embeddings',
'name': 'vector',
'role': 'vector_embeddings',
'type': 'array'}],
'id': 'autoai_rag_1.0',
'name': 'Document schema using open-source loaders',
'type': 'struct'}}},
'settings_importance': {'chunking': [{'importance': 0.093277715,
'parameter': 'chunk_size'},
{'importance': 0.046638858, 'parameter': 'chunk_overlap'}],
'embeddings': [{'importance': 0.20731471,
'parameter': 'embedding_model'}],
'generation': [{'importance': 0.459491,
'parameter': 'foundation_model'}],
'retrieval': [{'importance': 0.0, 'parameter': 'retrieval_method'},
{'importance': 0.124416634, 'parameter': 'window_size'},
{'importance': 0.06886108, 'parameter': 'number_of_chunks'}]}},
'software_spec': {'name': 'autoai-rag_rt24.1-py3.11'}},
'metrics': {'test_data': [{'ci_high': 0.8074,
'ci_low': 0.6667,
'mean': 0.7569,
'metric_name': 'answer_correctness'},
{'ci_high': 0.7267,
'ci_low': 0.5238,
'mean': 0.652,
'metric_name': 'faithfulness'},
{'mean': 1.0, 'metric_name': 'context_correctness'}]}},
{'context': {'iteration': 3,
'max_combinations': 240,
'rag_pattern': {'composition_steps': ['model_selection',
'chunking',
'embeddings',
'retrieval',
'generation'],
'duration_seconds': 26,
'location': {'evaluation_results': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern4/evaluation_results.json',
'indexing_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern4/indexing_inference_notebook.ipynb',
'inference_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern4/indexing_inference_notebook.ipynb',
'inference_service_code': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern4/inference_ai_service.gz',
'inference_service_metadata': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern4/inference_service_metadata.json'},
'name': 'Pattern4',
'settings': {'chunking': {'chunk_overlap': 256,
'chunk_size': 512,
'method': 'recursive'},
'embeddings': {'model_id': 'intfloat/multilingual-e5-large',
'truncate_input_tokens': 512,
'truncate_strategy': 'left'},
'generation': {'context_template_text': '[Document]\n{document}\n[End]',
'model_id': 'ibm/granite-3-3-8b-instruct',
'parameters': {'decoding_method': 'greedy',
'max_new_tokens': 1000,
'max_sequence_length': 131072,
'min_new_tokens': 1},
'prompt_template_text': '<|system|>\nYou are Granite Chat, an AI language model developed by IBM. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.<|user|>\nYou are an AI language model designed to function as a specialized Retrieval Augmented Generation (RAG) assistant. When generating responses, prioritize correctness, i.e., ensure that your response is grounded in context and user query. Always make sure that your response is relevant to the question. \nAnswer Length: detailed\n{reference_documents}\nRespond exclusively in the language of the question, regardless of any other language used in the provided context. Ensure that your entire response is in the same language as the question.\n{question} \n\n<|assistant|>',
'word_to_token_ratio': 2.573},
'retrieval': {'method': 'window',
'number_of_chunks': 5,
'window_size': 1},
'vector_store': {'datasource_type': 'chroma',
'distance_metric': 'cosine',
'index_name': 'autoai_rag_f2c34e8d_20250626153555',
'operation': 'upsert',
'schema': {'fields': [{'description': 'text field',
'name': 'text',
'role': 'text',
'type': 'string'},
{'description': 'document name field',
'name': 'document_id',
'role': 'document_name',
'type': 'string'},
{'description': 'chunk starting token position in the source document',
'name': 'start_index',
'role': 'start_index',
'type': 'number'},
{'description': 'chunk number per document',
'name': 'sequence_number',
'role': 'sequence_number',
'type': 'number'},
{'description': 'vector embeddings',
'name': 'vector',
'role': 'vector_embeddings',
'type': 'array'}],
'id': 'autoai_rag_1.0',
'name': 'Document schema using open-source loaders',
'type': 'struct'}}},
'settings_importance': {'chunking': [{'importance': 0.122112766,
'parameter': 'chunk_size'},
{'importance': 0.022782432, 'parameter': 'chunk_overlap'}],
'embeddings': [{'importance': 0.07525133,
'parameter': 'embedding_model'}],
'generation': [{'importance': 0.4554362,
'parameter': 'foundation_model'}],
'retrieval': [{'importance': 0.0, 'parameter': 'retrieval_method'},
{'importance': 0.22230415, 'parameter': 'window_size'},
{'importance': 0.10211313, 'parameter': 'number_of_chunks'}]}},
'software_spec': {'name': 'autoai-rag_rt24.1-py3.11'}},
'metrics': {'test_data': [{'ci_high': 0.5455,
'ci_low': 0.0,
'mean': 0.1818,
'metric_name': 'answer_correctness'},
{'ci_high': 0.0881,
'ci_low': 0.0,
'mean': 0.0294,
'metric_name': 'faithfulness'},
{'mean': 1.0, 'metric_name': 'context_correctness'}]}},
{'context': {'iteration': 4,
'max_combinations': 240,
'rag_pattern': {'composition_steps': ['model_selection',
'chunking',
'embeddings',
'retrieval',
'generation'],
'duration_seconds': 14,
'location': {'evaluation_results': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern5/evaluation_results.json',
'indexing_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern5/indexing_inference_notebook.ipynb',
'inference_notebook': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern5/indexing_inference_notebook.ipynb',
'inference_service_code': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern5/inference_ai_service.gz',
'inference_service_metadata': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/Pattern5/inference_service_metadata.json'},
'name': 'Pattern5',
'settings': {'chunking': {'chunk_overlap': 256,
'chunk_size': 1024,
'method': 'recursive'},
'embeddings': {'model_id': 'ibm/slate-125m-english-rtrvr-v2',
'truncate_input_tokens': 512,
'truncate_strategy': 'left'},
'generation': {'context_template_text': '[Document]\n{document}\n[End]',
'model_id': 'ibm/granite-3-8b-instruct',
'parameters': {'decoding_method': 'greedy',
'max_new_tokens': 1000,
'max_sequence_length': 131072,
'min_new_tokens': 1},
'prompt_template_text': '<|system|>\nYou are Granite Chat, an AI language model developed by IBM. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.<|user|>\nYou are an AI language model designed to function as a specialized Retrieval Augmented Generation (RAG) assistant. When generating responses, prioritize correctness, i.e., ensure that your response is grounded in context and user query. Always make sure that your response is relevant to the question. \nAnswer Length: detailed\n{reference_documents}\nRespond exclusively in the language of the question, regardless of any other language used in the provided context. Ensure that your entire response is in the same language as the question.\n{question} \n\n<|assistant|>',
'word_to_token_ratio': 2.573},
'retrieval': {'method': 'window',
'number_of_chunks': 3,
'window_size': 4},
'vector_store': {'datasource_type': 'chroma',
'distance_metric': 'cosine',
'index_name': 'autoai_rag_f2c34e8d_20250626153627',
'operation': 'upsert',
'schema': {'fields': [{'description': 'text field',
'name': 'text',
'role': 'text',
'type': 'string'},
{'description': 'document name field',
'name': 'document_id',
'role': 'document_name',
'type': 'string'},
{'description': 'chunk starting token position in the source document',
'name': 'start_index',
'role': 'start_index',
'type': 'number'},
{'description': 'chunk number per document',
'name': 'sequence_number',
'role': 'sequence_number',
'type': 'number'},
{'description': 'vector embeddings',
'name': 'vector',
'role': 'vector_embeddings',
'type': 'array'}],
'id': 'autoai_rag_1.0',
'name': 'Document schema using open-source loaders',
'type': 'struct'}}},
'settings_importance': {'chunking': [{'importance': 0.0615634,
'parameter': 'chunk_size'},
{'importance': 0.009549737, 'parameter': 'chunk_overlap'}],
'embeddings': [{'importance': 0.06833898,
'parameter': 'embedding_model'}],
'generation': [{'importance': 0.4761837,
'parameter': 'foundation_model'}],
'retrieval': [{'importance': 0.0, 'parameter': 'retrieval_method'},
{'importance': 0.3240124, 'parameter': 'window_size'},
{'importance': 0.060351793, 'parameter': 'number_of_chunks'}]}},
'software_spec': {'name': 'autoai-rag_rt24.1-py3.11'}},
'metrics': {'test_data': [{'ci_high': 0.8182,
'ci_low': 0.6825,
'mean': 0.733,
'metric_name': 'answer_correctness'},
{'ci_high': 0.7949,
'ci_low': 0.5407,
'mean': 0.7084,
'metric_name': 'faithfulness'},
{'mean': 1.0, 'metric_name': 'context_correctness'}]}}],
'results_reference': {'location': {'path': 'default_autoai_rag_out',
'training': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a',
'training_status': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/training-status.json',
'training_log': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/output.log',
'assets_path': 'default_autoai_rag_out/f2c34e8d-b613-4f25-ab1e-943c5fb8837a/assets'},
'type': 'container'},
'status': {'completed_at': '2025-06-26T15:36:48.763Z',
'message': {'level': 'info',
'text': 'AAR019I: AutoAI execution completed.'},
'running_at': '2025-06-26T15:31:36.000Z',
'state': 'completed',
'step': 'generation'},
'test_data_references': [{'connection': {'id': '5841723d-848f-440a-82b0-b6ad59d983ec'},
'location': {'bucket': 'autoai-rag-with-extraction-experiment',
'file_name': 'benchmark.json'},
'type': 'connection_asset'}],
'timestamp': '2025-06-26T15:36:49.506Z'},
'metadata': {'created_at': '2025-06-26T15:29:46.362Z',
'description': 'AutoAI RAG experiment on documents generated by text extraction service',
'id': 'f2c34e8d-b613-4f25-ab1e-943c5fb8837a',
'modified_at': '2025-06-26T15:36:48.887Z',
'name': 'AutoAI RAG - Text Extraction service experiment',
'space_id': '9f44cc2b-b3d0-4472-824e-4941afb1617b'}}