diff --git a/dspy/evaluate/evaluate.py b/dspy/evaluate/evaluate.py index 3a9fc9da8..39f980696 100644 --- a/dspy/evaluate/evaluate.py +++ b/dspy/evaluate/evaluate.py @@ -2,9 +2,9 @@ import signal import sys import threading +import traceback import types from typing import Any -import traceback import pandas as pd import tqdm @@ -69,11 +69,6 @@ def __init__( self.return_all_scores = return_all_scores self.return_outputs = return_outputs self.provide_traceback = provide_traceback - if "display" in _kwargs: - dspy.logger.warning( - "DeprecationWarning: 'display' has been deprecated. To see all information for debugging," - " use 'dspy.set_log_level('debug')'. In the future this will raise an error.", - ) def _execute_single_thread(self, wrapped_program, devset, display_progress): ncorrect = 0 diff --git a/dspy/teleprompt/copro_optimizer.py b/dspy/teleprompt/copro_optimizer.py index 9a45f3251..49c693eb3 100644 --- a/dspy/teleprompt/copro_optimizer.py +++ b/dspy/teleprompt/copro_optimizer.py @@ -74,9 +74,6 @@ def __init__( self.prompt_model = prompt_model self.track_stats = track_stats - if "verbose" in _kwargs: - dspy.logger.warning("DeprecationWarning: 'verbose' has been deprecated. To see all information for debugging, use 'dspy.set_log_level('debug')'. In the future this will raise an error.") - def _check_candidates_equal(self, candidate1, candidate2): for p1, p2 in zip(candidate1["program"].predictors(), candidate2["program"].predictors()): if self._get_signature(p1).instructions != self._get_signature(p2).instructions: @@ -199,9 +196,11 @@ def compile(self, student, *, trainset, eval_kwargs): for p_i, (p_old, p_new) in enumerate(zip(module.predictors(), module_clone.predictors())): candidates_ = latest_candidates[id(p_old)] # Use the most recently generated candidates for evaluation if len(module.predictors()) > 1: + # Unless our program has multiple predictors, in which case we need to reevaluate all prompts with + # the new prompt(s) for the other predictor(s). candidates_ = all_candidates[ id(p_old) - ] # Unless our program has multiple predictors, in which case we need to reevaluate all prompts with the new prompt(s) for the other predictor(s) + ] # For each candidate for c_i, c in enumerate(candidates_): @@ -225,7 +224,8 @@ def compile(self, student, *, trainset, eval_kwargs): dspy.logger.debug(f"Predictor {i+1}") self._print_signature(predictor) dspy.logger.info( - f"At Depth {d+1}/{self.depth}, Evaluating Prompt Candidate #{c_i+1}/{len(candidates_)} for Predictor {p_i+1} of {len(module.predictors())}.", + f"At Depth {d+1}/{self.depth}, Evaluating Prompt Candidate #{c_i+1}/{len(candidates_)} for " + f"Predictor {p_i+1} of {len(module.predictors())}.", ) score = evaluate(module_clone, devset=trainset, **eval_kwargs) if self.prompt_model: @@ -270,7 +270,8 @@ def compile(self, student, *, trainset, eval_kwargs): self._set_signature(p_new, updated_signature) dspy.logger.debug( - f"Updating Predictor {id(p_old)} to:\ni: {best_candidate['instruction']}\np: {best_candidate['prefix']}", + f"Updating Predictor {id(p_old)} to:\ni: {best_candidate['instruction']}\n" + f"p: {best_candidate['prefix']}", ) dspy.logger.debug("Full predictor with update: ") for i, predictor in enumerate(module_clone.predictors()): @@ -321,7 +322,9 @@ def compile(self, student, *, trainset, eval_kwargs): )(attempted_instructions=attempts) if self.prompt_model: - dspy.logger.debug(f"(self.prompt_model.inspect_history(n=1)) {self.prompt_model.inspect_history(n=1)}") + dspy.logger.debug( + f"(self.prompt_model.inspect_history(n=1)) {self.prompt_model.inspect_history(n=1)}" + ) # Get candidates for each predictor new_candidates[id(p_base)] = instr.completions all_candidates[id(p_base)].proposed_instruction.extend(instr.completions.proposed_instruction)