import os # to set the environment variables via python os.environ["OPENAI_API_KEY"] = "put-your-openai-api-key-here" os.environ["TONIC_VALIDATE_API_KEY"] = "put-your-tonic-validate-api-key-here" from tvallogging.api import TonicValidateApi from tvallogging.chat_objects import Benchmark project_name: str # name of your new project benchmark_name: str # name of your new benchmark # list of dictionaries of the form # { # "question": "question for the benchmark", # "answer": "reference answer to the question" # } question_with_answer_list: List[Dict[str, str]] api = TonicValidateApi() benchmark = Benchmark.from_json_list(question_with_answer_list) benchmark_id = api.new_benchmark(benchmark, benchmark_name) project = api.new_project(benchmark_id, project_name) llm_evaluator = "gpt-4" run = project.new_run(llm_evaluator) for question_with_answer in run.benchmark.question_with_answer_list: question = question_with_answer.question llm_answer: str # answer obtained from the RAG application retrieved_context_list: List[str] # list of context retrieved by the RAG application # log the llm_answer and retrieved_context_list to Tonic Validate # in this step, the RAG metrics are calculated locally run.log(question_with_answer, llm_answer, retrieved_context_list)