Skip to Content

LLM Integration Module

Django-CFG includes a comprehensive LLM integration module that provides multi-provider support, intelligent caching, cost tracking, and seamless Django integration.

Overview

The Django LLM module provides:

  • Multi-provider support (OpenAI, OpenRouter)
  • Automatic cost calculation and tracking
  • Intelligent caching with TTL
  • Type-safe configuration with Pydantic 2
  • Token counting and usage analytics
  • JSON extraction utilities
  • Translation services with caching

Quick Start

Enable LLM Module

# config.py from django_cfg import DjangoConfig class MyConfig(DjangoConfig): # LLM API keys openai_api_key: str = env.openai_api_key # From YAML config openrouter_api_key: str = "<from-yaml-config>" # Set via environment/config.yaml # Optional: Custom cache directory llm_cache_dir: str = "cache/llm" llm_cache_ttl: int = 3600 # 1 hour

Basic Usage

from django_cfg.modules.django_llm.llm.client import LLMClient # Initialize with API keys client = LLMClient( apikey_openrouter="sk-or-v1-...", apikey_openai="sk-proj-...", cache_dir=Path("cache/llm"), cache_ttl=3600, max_cache_size=1000 ) # Chat completion response = client.chat_completion( messages=[ {"role": "user", "content": "Explain quantum computing"} ], model="openai/gpt-4o-mini" ) print(response['content'])

LLM Client

Chat Completions

from django_cfg.modules.django_llm.llm.client import LLMClient client = LLMClient() # Basic chat completion response = client.chat_completion( messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What is machine learning?"} ], model="openai/gpt-4o-mini", temperature=0.7, max_tokens=500 ) # Streaming response for chunk in client.chat_completion_stream( messages=[{"role": "user", "content": "Tell me a story"}], model="openai/gpt-4o-mini" ): print(chunk, end='', flush=True) # With function calling functions = [ { "name": "get_weather", "description": "Get current weather", "parameters": { "type": "object", "properties": { "location": {"type": "string"} } } } ] response = client.chat_completion( messages=[{"role": "user", "content": "What's the weather in Paris?"}], model="openai/gpt-4o-mini", functions=functions, function_call="auto" )

Embeddings

# Generate embeddings embedding = client.generate_embedding( text="Sample text for embedding", model="text-embedding-ada-002" ) # Batch embeddings texts = ["Text 1", "Text 2", "Text 3"] embeddings = client.generate_embeddings_batch( texts=texts, model="text-embedding-ada-002" ) # Similarity search def find_similar_documents(query_text, document_embeddings): query_embedding = client.generate_embedding(query_text) similarities = [] for doc_id, doc_embedding in document_embeddings.items(): similarity = cosine_similarity(query_embedding, doc_embedding) similarities.append((doc_id, similarity)) return sorted(similarities, key=lambda x: x[1], reverse=True)

đź’° Cost Tracking

Automatic Cost Calculation

from django_cfg.modules.django_llm.llm.costs import calculate_chat_cost # Calculate cost for chat completion cost = calculate_chat_cost( model="openai/gpt-4o-mini", input_tokens=100, output_tokens=50, models_cache=models_cache ) print(f"Cost: ${cost:.4f}") # Estimate cost before API call from django_cfg.modules.django_llm.llm.tokenizer import count_tokens messages = [ {"role": "user", "content": "What is artificial intelligence?"} ] input_tokens = count_tokens(messages, model="gpt-4o-mini") estimated_cost = calculate_chat_cost( model="openai/gpt-4o-mini", input_tokens=input_tokens, output_tokens=100 # Estimated ) print(f"Estimated cost: ${estimated_cost:.4f}")

Cost Monitoring

class CostTracker: def __init__(self): self.total_cost = 0 self.usage_log = [] def track_usage(self, model, input_tokens, output_tokens, cost): self.total_cost += cost self.usage_log.append({ 'timestamp': datetime.now(), 'model': model, 'input_tokens': input_tokens, 'output_tokens': output_tokens, 'cost': cost }) def get_daily_usage(self, date=None): if date is None: date = datetime.now().date() daily_usage = [ entry for entry in self.usage_log if entry['timestamp'].date() == date ] return { 'total_cost': sum(entry['cost'] for entry in daily_usage), 'total_tokens': sum( entry['input_tokens'] + entry['output_tokens'] for entry in daily_usage ), 'requests': len(daily_usage) }

đź§  Intelligent Caching

Cache Configuration

from django_cfg.modules.django_llm.llm.cache import LLMCache # Custom cache settings cache = LLMCache( cache_dir=Path("cache/llm"), ttl=3600, # 1 hour max_size=1000 # Max 1000 cached responses ) # Cache management cache_info = cache.get_cache_info() print(f"Cache size: {cache_info['size']}") print(f"Hit rate: {cache_info['hit_rate']:.2%}") # Clear cache cache.clear_cache() # Cache specific to model cache.clear_cache(model="gpt-4o-mini")

Cache Strategies

# Cache with custom key def cached_completion(prompt, model="gpt-4o-mini", use_cache=True): if use_cache: cache_key = f"{model}:{hash(prompt)}" cached_response = cache.get(cache_key) if cached_response: return cached_response response = client.chat_completion( messages=[{"role": "user", "content": prompt}], model=model ) if use_cache: cache.set(cache_key, response, ttl=3600) return response # Conditional caching based on cost def smart_cached_completion(prompt, model="gpt-4o-mini"): # Estimate cost input_tokens = count_tokens([{"role": "user", "content": prompt}], model) estimated_cost = calculate_chat_cost(model, input_tokens, 100) # Use cache for expensive requests use_cache = estimated_cost > 0.01 # Cache if cost > $0.01 return cached_completion(prompt, model, use_cache)

Token Management

Token Counting

from django_cfg.modules.django_llm.llm.tokenizer import count_tokens, estimate_tokens # Count tokens in messages messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What is machine learning?"} ] token_count = count_tokens(messages, model="gpt-4o-mini") print(f"Token count: {token_count}") # Estimate tokens for text text = "This is a sample text for token estimation." estimated = estimate_tokens(text) print(f"Estimated tokens: {estimated}") # Token budget management def manage_token_budget(messages, max_tokens=4000, model="gpt-4o-mini"): current_tokens = count_tokens(messages, model) if current_tokens > max_tokens: # Truncate older messages while current_tokens > max_tokens and len(messages) > 1: messages.pop(1) # Keep system message, remove oldest user/assistant current_tokens = count_tokens(messages, model) return messages

Usage Analytics

class TokenAnalytics: def __init__(self): self.usage_stats = {} def track_usage(self, model, input_tokens, output_tokens, cost): if model not in self.usage_stats: self.usage_stats[model] = { 'total_input_tokens': 0, 'total_output_tokens': 0, 'total_cost': 0, 'request_count': 0 } stats = self.usage_stats[model] stats['total_input_tokens'] += input_tokens stats['total_output_tokens'] += output_tokens stats['total_cost'] += cost stats['request_count'] += 1 def get_model_efficiency(self, model): if model not in self.usage_stats: return None stats = self.usage_stats[model] total_tokens = stats['total_input_tokens'] + stats['total_output_tokens'] return { 'cost_per_token': stats['total_cost'] / total_tokens if total_tokens > 0 else 0, 'avg_tokens_per_request': total_tokens / stats['request_count'], 'avg_cost_per_request': stats['total_cost'] / stats['request_count'] }

JSON Extraction

Structured Data Extraction

from django_cfg.modules.django_llm.llm.extractor import JSONExtractor extractor = JSONExtractor(client) # Extract structured data text = """ John Doe is a 30-year-old software engineer living in San Francisco. He works at TechCorp and earns $120,000 per year. """ schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, "profession": {"type": "string"}, "location": {"type": "string"}, "company": {"type": "string"}, "salary": {"type": "integer"} } } result = extractor.extract_json( text=text, schema=schema, model="openai/gpt-4o-mini" ) print(result) # Output: { # "name": "John Doe", # "age": 30, # "profession": "software engineer", # "location": "San Francisco", # "company": "TechCorp", # "salary": 120000 # }

Batch Extraction

# Extract from multiple texts texts = [ "Alice Smith, 25, designer at CreativeCo", "Bob Johnson, 35, manager at BusinessInc", "Carol Brown, 28, developer at StartupXYZ" ] schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, "role": {"type": "string"}, "company": {"type": "string"} } } results = extractor.extract_json_batch( texts=texts, schema=schema, model="openai/gpt-4o-mini" ) for result in results: print(f"{result['name']}: {result['role']} at {result['company']}")

🌍 Translation Services

Multi-language Translation

from django_cfg.modules.django_llm.translator import Translator translator = Translator(client) # Basic translation result = translator.translate( text="Hello, how are you?", target_language="Spanish", model="openai/gpt-4o-mini" ) print(result['translated_text']) # Output: "Hola, ¿cómo estás?" # Batch translation texts = [ "Good morning", "Thank you", "Goodbye" ] results = translator.translate_batch( texts=texts, target_language="French", model="openai/gpt-4o-mini" ) for original, translated in zip(texts, results): print(f"{original} -> {translated['translated_text']}")

Translation with Context

# Translation with context for better accuracy result = translator.translate( text="The bank is closed", target_language="Spanish", context="Financial institution", model="openai/gpt-4o-mini" ) # vs without context (might translate as river bank) result_no_context = translator.translate( text="The bank is closed", target_language="Spanish", model="openai/gpt-4o-mini" )

Real-World Applications

Content Generation

class ContentGenerator: def __init__(self): self.client = LLMClient() def generate_product_description(self, product_name, features, target_audience): prompt = f""" Generate a compelling product description for {product_name}. Features: {', '.join(features)} Target Audience: {target_audience} Make it engaging and highlight the key benefits. """ response = self.client.chat_completion( messages=[{"role": "user", "content": prompt}], model="openai/gpt-4o-mini", temperature=0.7 ) return response['content'] def generate_blog_post(self, topic, keywords, word_count=800): prompt = f""" Write a {word_count}-word blog post about {topic}. Include these keywords naturally: {', '.join(keywords)} Structure: 1. Engaging introduction 2. Main content with subheadings 3. Conclusion with call-to-action """ response = self.client.chat_completion( messages=[{"role": "user", "content": prompt}], model="openai/gpt-4o-mini", temperature=0.8 ) return response['content']

Data Analysis

class DataAnalyzer: def __init__(self): self.client = LLMClient() self.extractor = JSONExtractor(self.client) def analyze_customer_feedback(self, feedback_text): schema = { "type": "object", "properties": { "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]}, "confidence": {"type": "number", "minimum": 0, "maximum": 1}, "key_topics": {"type": "array", "items": {"type": "string"}}, "action_items": {"type": "array", "items": {"type": "string"}}, "priority": {"type": "string", "enum": ["low", "medium", "high"]} } } return self.extractor.extract_json( text=feedback_text, schema=schema, model="openai/gpt-4o-mini" ) def summarize_data_trends(self, data_description): prompt = f""" Analyze the following data and provide insights: {data_description} Provide: 1. Key trends 2. Notable patterns 3. Recommendations 4. Potential concerns """ response = self.client.chat_completion( messages=[{"role": "user", "content": prompt}], model="openai/gpt-4o-mini" ) return response['content']

Customer Support

class AICustomerSupport: def __init__(self): self.client = LLMClient() def generate_response(self, customer_message, context=None): system_prompt = """ You are a helpful customer support agent. Be polite, professional, and provide accurate information. If you don't know something, say so and offer to escalate to a human agent. """ messages = [{"role": "system", "content": system_prompt}] if context: messages.append({ "role": "system", "content": f"Context: {context}" }) messages.append({ "role": "user", "content": customer_message }) response = self.client.chat_completion( messages=messages, model="openai/gpt-4o-mini", temperature=0.3 # Lower temperature for consistent responses ) return response['content'] def classify_ticket(self, ticket_content): schema = { "type": "object", "properties": { "category": { "type": "string", "enum": ["billing", "technical", "account", "general"] }, "urgency": { "type": "string", "enum": ["low", "medium", "high", "critical"] }, "requires_human": {"type": "boolean"}, "suggested_response": {"type": "string"} } } return self.extractor.extract_json( text=ticket_content, schema=schema, model="openai/gpt-4o-mini" )

Performance Monitoring

Client Information

# Get comprehensive client info client_info = client.get_client_info() print(f"Cache directory: {client_info['cache_directory']}") print(f"Cache size: {client_info['cache_info']['size']}") print(f"API keys configured: {client_info['api_keys']}") print(f"Available models: {len(client_info['available_models'])}")

Usage Monitoring

class LLMMonitor: def __init__(self): self.usage_log = [] def log_request(self, model, input_tokens, output_tokens, cost, response_time): self.usage_log.append({ 'timestamp': datetime.now(), 'model': model, 'input_tokens': input_tokens, 'output_tokens': output_tokens, 'cost': cost, 'response_time': response_time }) def get_usage_report(self, days=7): cutoff = datetime.now() - timedelta(days=days) recent_usage = [ entry for entry in self.usage_log if entry['timestamp'] > cutoff ] if not recent_usage: return {} total_cost = sum(entry['cost'] for entry in recent_usage) total_tokens = sum( entry['input_tokens'] + entry['output_tokens'] for entry in recent_usage ) avg_response_time = sum( entry['response_time'] for entry in recent_usage ) / len(recent_usage) return { 'total_requests': len(recent_usage), 'total_cost': total_cost, 'total_tokens': total_tokens, 'avg_response_time': avg_response_time, 'cost_per_token': total_cost / total_tokens if total_tokens > 0 else 0 }

The LLM module provides comprehensive AI integration for your Django applications! 🤖

TAGS: llm, ai, openai, chat-completion, embeddings, cost-tracking, caching DEPENDS_ON: [configuration, caching, api-keys] USED_BY: [agents, knowbase, content-generation, customer-support]