import dataiku import ast from langchain_core.messages import HumanMessage, SystemMessage LLM_ID = "" # Fill with a valid LLM_ID SSIZE = 10 client = dataiku.api_client() project = client.get_default_project() llm = project.get_llm(LLM_ID).as_langchain_llm() input_dataset = dataiku.Dataset("reviews") new_cols = [ {"type": "string", "name": "llm_sentiment"}, {"type": "string", "name": "llm_explanation"} ] output_schema = input_dataset.read_schema() + new_cols output_dataset = dataiku.Dataset("reviews_sample_llm_scored") output_dataset.write_schema(output_schema) system_msg = f""" You are an assistant that classifies reviews according to their sentiment. \ Respond in json format with the keys: llm_sentiment and llm_explanation. \ The value for llm_sentiment should only be either pos or neg without punctuation: pos if the review is positive, neg otherwise.\ The value for llm_explanation should be a very short explanation for the sentiment. """ cnt = 0 with output_dataset.get_writer() as w: for r in input_dataset.iter_rows(): messages = [ SystemMessage(content=system_msg), HumanMessage(content=r.get("text")) ] llm_out = llm.invoke(messages) w.write_row_dict({**dict(r), **(ast.literal_eval(llm_out))}) cnt += 1 if cnt == SSIZE: break