Fast, efficient AI inference for your applications
1pip install bhumi
1
2import asyncio
3from bhumi.base_client import BaseLLMClient, LLMConfig
4import os
5
6async def main():
7 config = LLMConfig(
8 api_key=os.getenv("OPENAI_API_KEY"),
9 model="openai/gpt-4"
10 )
11
12 client = BaseLLMClient(config)
13 response = await client.completion([
14 {"role": "user", "content": "Hello!"}
15 ])
16
17 print(response['text'])
18
19if __name__ == "__main__":
20 asyncio.run(main())
21
openai/model_name
anthropic/model_name
gemini/model_name
groq/model_name
sambanova/model_name
1
2async def get_weather(location: str, unit: str = "f") -> str:
3 result = f"The weather in {location} is 75°{unit}"
4 return result
5
6# Register the tool
7client.register_tool(
8 name="get_weather",
9 func=get_weather,
10 description="Get the current weather for a location",
11 parameters={
12 "type": "object",
13 "properties": {
14 "location": {"type": "string", "description": "The city and state"},
15 "unit": {"type": "string", "enum": ["c", "f"]}
16 },
17 "required": ["location", "unit"]
18 }
19)
20
Get real-time responses as they're generated
Easily switch between different AI providers
Add custom tools for enhanced functionality
1
2import asyncio
3from bhumi.base_client import BaseLLMClient, LLMConfig
4
5async def main():
6 config = LLMConfig(
7 api_key="your-api-key",
8 model="openai/gpt-4",
9 stream=True # Enable streaming (default)
10 )
11
12 client = BaseLLMClient(config)
13
14 # Option 1: Process chunks as they arrive
15 async for chunk in client.stream([
16 {"role": "user", "content": "Write a story about a robot"}
17 ]):
18 print(chunk.text, end="", flush=True)
19
20 # Option 2: Get final response with streaming internally enabled
21 response = await client.completion([
22 {"role": "user", "content": "Write a story about a robot"}
23 ])
24 print(response['text'])
25
26if __name__ == "__main__":
27 asyncio.run(main())
1
2import asyncio
3from bhumi.base_client import BaseLLMClient, LLMConfig
4
5async def main():
6 config = LLMConfig(
7 api_key="your-api-key",
8 model="openai/gpt-4",
9 stream=False # Disable streaming
10 )
11
12 client = BaseLLMClient(config)
13
14 # Get complete response at once
15 response = await client.completion([
16 {"role": "user", "content": "Write a story about a robot"}
17 ])
18 print(response['text'])
19
20if __name__ == "__main__":
21 asyncio.run(main())