Context Management
pydeflate v2.2+ introduces a context management system that eliminates global state and enables advanced use cases like parallel processing, custom cache directories, and dependency injection.
Why Use Contexts?
The traditional approach uses global configuration:
from pydeflate import set_pydeflate_path, imf_gdp_deflate
# Global state - affects all subsequent operations
set_pydeflate_path("./data")
result = imf_gdp_deflate(df, base_year=2015, ...)
Limitations:
- Can't use different cache directories in the same script
- Difficult to manage logging verbosity per operation
Context management solves these issues:
from pydeflate.context import pydeflate_session
# Isolated configuration
with pydeflate_session(data_dir="./cache1") as ctx:
result1 = imf_gdp_deflate(df, base_year=2015, context=ctx, ...)
with pydeflate_session(data_dir="./cache2") as ctx:
result2 = wb_gdp_deflate(df, base_year=2020, context=ctx, ...)
Basic Usage
Session Context
Use pydeflate_session()
for scoped configuration:
from pydeflate.context import pydeflate_session
from pydeflate import imf_gdp_deflate
import pandas as pd
data = {
'country': ['USA', 'GBR'],
'year': [2015, 2016],
'value': [1000, 1100]
}
df = pd.DataFrame(data)
# Use custom cache directory
with pydeflate_session(data_dir="./my_cache") as ctx:
result = imf_gdp_deflate(
data=df,
base_year=2015,
source_currency="USA",
target_currency="USA",
id_column="country",
value_column="value",
target_value_column="value_constant",
context=ctx # Pass context
)
print(result)
# Data cached in ./my_cache
Temporary Context
For testing or one-off operations:
from pydeflate.context import temporary_context
from pydeflate import wb_cpi_deflate
# Creates temporary directory, auto-cleaned on exit
with temporary_context() as ctx:
result = wb_cpi_deflate(
data=df,
base_year=2020,
context=ctx,
...
)
# Process result
# Temporary directory automatically deleted
Configuration Options
Data Directory
Specify where deflator/exchange data is cached:
with pydeflate_session(data_dir="/app/data/pydeflate") as ctx:
result = imf_gdp_deflate(df, context=ctx, ...)
Logging Level
Control verbosity:
import logging
with pydeflate_session(log_level=logging.DEBUG) as ctx:
# Detailed debug logs
result = imf_gdp_deflate(df, context=ctx, ...)
with pydeflate_session(log_level=logging.WARNING) as ctx:
# Only warnings and errors
result = wb_gdp_deflate(df, context=ctx, ...)
Schema Validation
Enable data quality checks:
with pydeflate_session(enable_validation=True) as ctx:
# Schema validation enabled
result = imf_gdp_deflate(df, context=ctx, ...)
See Schema Validation for details.
Combined Configuration
import logging
with pydeflate_session(
data_dir="./cache",
log_level=logging.INFO,
enable_validation=True
) as ctx:
result = imf_gdp_deflate(df, context=ctx, ...)
Default Context
Set a default context for your entire application:
from pydeflate.context import PydeflateContext, set_default_context
from pydeflate import imf_gdp_deflate
import logging
# Create and configure context
ctx = PydeflateContext.create(
data_dir="/app/cache",
log_level=logging.INFO,
enable_validation=False
)
# Set as default
set_default_context(ctx)
# All subsequent operations use this context automatically
result1 = imf_gdp_deflate(df1, base_year=2015, ...)
result2 = wb_gdp_deflate(df2, base_year=2020, ...)
result3 = oecd_dac_deflate(df3, base_year=2018, ...)
Retrieving Default Context
from pydeflate.context import get_default_context
ctx = get_default_context()
print(f"Cache directory: {ctx.data_dir}")
print(f"Logging level: {ctx.log_level}")
Parallel Processing
Contexts are thread-safe, enabling parallel operations:
from pydeflate.context import pydeflate_session
from pydeflate import imf_gdp_deflate, wb_gdp_deflate
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
# Multiple datasets to process
datasets = [df1, df2, df3, df4]
def process_dataset(df, index):
"""Process dataset with isolated context."""
with pydeflate_session(data_dir=f"./cache_{index}") as ctx:
return imf_gdp_deflate(
data=df,
base_year=2015,
source_currency="USA",
target_currency="USA",
context=ctx,
...
)
# Process in parallel
with ThreadPoolExecutor(max_workers=4) as executor:
futures = [
executor.submit(process_dataset, df, i)
for i, df in enumerate(datasets)
]
results = [f.result() for f in futures]
print(f"Processed {len(results)} datasets in parallel")
Shared Cache for Parallel Operations
To use a shared cache (with file locking):
from pydeflate.context import pydeflate_session
from concurrent.futures import ThreadPoolExecutor
def process_with_shared_cache(df, base_year):
"""Multiple threads can safely share same cache."""
with pydeflate_session(data_dir="./shared_cache") as ctx:
return imf_gdp_deflate(
data=df,
base_year=base_year,
context=ctx,
...
)
with ThreadPoolExecutor(max_workers=4) as executor:
# All threads share ./shared_cache (file locking prevents conflicts)
futures = [
executor.submit(process_with_shared_cache, df, year)
for df, year in zip(datasets, base_years)
]
results = [f.result() for f in futures]
File Locking
pydeflate uses filelock
for thread-safe cache operations. Multiple threads can safely read/write the same cache directory.
Environment-Specific Configuration
Use contexts to manage different environments:
from pydeflate.context import PydeflateContext, set_default_context
import os
import logging
# Load environment
environment = os.getenv("ENVIRONMENT", "development")
if environment == "production":
ctx = PydeflateContext.create(
data_dir="/var/lib/pydeflate",
log_level=logging.WARNING,
enable_validation=True # Strict validation in prod
)
elif environment == "development":
ctx = PydeflateContext.create(
data_dir="./dev_cache",
log_level=logging.DEBUG, # Verbose logs in dev
enable_validation=False
)
elif environment == "testing":
ctx = PydeflateContext.create(
data_dir="/tmp/pydeflate_test",
log_level=logging.ERROR,
enable_validation=True
)
set_default_context(ctx)
Testing with Contexts
Contexts make testing cleaner:
import pytest
from pydeflate.context import temporary_context
from pydeflate import imf_gdp_deflate
import pandas as pd
def test_deflation():
"""Test deflation without global state contamination."""
# Arrange
data = {
'country': ['USA'],
'year': [2015],
'value': [1000]
}
df = pd.DataFrame(data)
# Act
with temporary_context() as ctx:
result = imf_gdp_deflate(
data=df,
base_year=2015,
source_currency="USA",
target_currency="USA",
id_column="country",
value_column="value",
target_value_column="value_constant",
context=ctx
)
# Assert
assert 'value_constant' in result.columns
assert result['value_constant'].iloc[0] == 1000 # Base year, no change
# Temporary cache automatically cleaned up
Migration from Global State
Before (v2.1 and earlier)
from pydeflate import set_pydeflate_path, imf_gdp_deflate
# Global configuration
set_pydeflate_path("./data")
# All operations use global state
result1 = imf_gdp_deflate(df1, ...)
result2 = wb_gdp_deflate(df2, ...)
After (v2.2+)
from pydeflate.context import PydeflateContext, set_default_context
from pydeflate import imf_gdp_deflate, wb_gdp_deflate
# Option 1: Set default context (backward compatible)
ctx = PydeflateContext.create(data_dir="./data")
set_default_context(ctx)
result1 = imf_gdp_deflate(df1, ...) # Uses default context
result2 = wb_gdp_deflate(df2, ...) # Uses default context
# Option 2: Explicit context (recommended for new code)
with pydeflate_session(data_dir="./data") as ctx:
result1 = imf_gdp_deflate(df1, context=ctx, ...)
result2 = wb_gdp_deflate(df2, context=ctx, ...)
Backward Compatibility
set_pydeflate_path()
still works and internally creates a default context. Existing code doesn't need to change.
Complete Example
Production application with context management:
from pydeflate.context import PydeflateContext, set_default_context
from pydeflate import imf_gdp_deflate, wb_cpi_deflate
from pydeflate.exceptions import PydeflateError
import pandas as pd
import logging
import os
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Set up context based on environment
env = os.getenv("APP_ENV", "development")
if env == "production":
context = PydeflateContext.create(
data_dir="/var/lib/myapp/pydeflate",
log_level=logging.WARNING,
enable_validation=True
)
elif env == "testing":
context = PydeflateContext.create(
data_dir="/tmp/pydeflate_test",
log_level=logging.ERROR,
enable_validation=True
)
else: # development
context = PydeflateContext.create(
data_dir="./dev_cache",
log_level=logging.DEBUG,
enable_validation=False
)
set_default_context(context)
# Application code
def process_economic_data(gdp_df, cpi_df):
"""Process economic indicators."""
try:
# GDP deflation
gdp_result = imf_gdp_deflate(
data=gdp_df,
base_year=2015,
source_currency="LCU",
target_currency="USA",
id_column="country",
value_column="gdp",
target_value_column="gdp_usd_2015"
)
# CPI deflation
cpi_result = wb_cpi_deflate(
data=cpi_df,
base_year=2015,
source_currency="LCU",
target_currency="LCU",
id_column="country",
value_column="price",
target_value_column="price_2015"
)
return gdp_result, cpi_result
except PydeflateError as e:
logging.error(f"Deflation failed: {e}")
raise
# Run application
if __name__ == "__main__":
# Load data
gdp_df = pd.read_csv("gdp_data.csv")
cpi_df = pd.read_csv("cpi_data.csv")
# Process
gdp_result, cpi_result = process_economic_data(gdp_df, cpi_df)
# Save results
gdp_result.to_csv("gdp_constant.csv", index=False)
cpi_result.to_csv("cpi_constant.csv", index=False)
print(f"Processed {len(gdp_result)} GDP observations")
print(f"Processed {len(cpi_result)} CPI observations")
Next Steps
- Error Handling - Robust error handling with contexts
- Schema Validation - Enable validation in context
- Plugin System - Use custom sources with contexts