Skip to content

Generate Multiple Candidate Responses

By generating multiple candidate responses in parallel and choosing the most common answer among them, we can get a more accurate answer. This is known as Self-Consistency 1

We can implement this using instructor as seen below.

import instructor
from pydantic import BaseModel, Field
from openai import AsyncOpenAI
import asyncio
from collections import Counter
from textwrap import dedent


class SelfConsistencyResponse(BaseModel):
    chain_of_thought: str = Field(
        description="reasoning behind the final correct answer"
    )
    correct_answer: int


client = instructor.from_openai(AsyncOpenAI())


async def generate_self_consistent_response(prompt: str):
    return await client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": """You are an intelligent question
                answering AI system that excels at answering
                user queries. Make sure to generate a
                comprehensive explanation of your thought
                process before providing the final answer""",
            },
            {"role": "user", "content": prompt},
        ],
        response_model=SelfConsistencyResponse,
        temperature=0.5,
    )


async def generate_self_consistent_responses(prompt: str, num_responses: int):
    coros = [
        generate_self_consistent_response(prompt)
        for _ in range(num_responses)
    ]
    responses = await asyncio.gather(*coros)
    return responses


if __name__ == "__main__":
    prompt = dedent(
        """
        Janet's ducks lay 16 eggs per day.
        She eats three for breakfast every
        morning and bakes muffins for her
        friends every day with four. She sells
        the remainder for $2 per egg. How
        much does she make every day?
        """
    )
    responses = asyncio.run(generate_self_consistent_responses(prompt, 5))
    answer_counts = Counter([
        response.correct_answer for response in responses
    ])
    most_common_answer, _ = answer_counts.most_common(1)[0]

    print(most_common_answer)
    #> 18

References

1: Self-Consistency Improves Chain Of Thought Reasoning In Language Models