Example: Error Recovery Mechanisms
Overview
This example demonstrates the comprehensive error recovery mechanisms implemented in the AR Payment Reversal dashboard. The system provides multi-layered error handling, automatic retry logic, compensation patterns, graceful degradation, and user-friendly error reporting to ensure reliability and maintainability even in failure scenarios.
Business Value
Robust error recovery is critical for financial operations:
- Data Integrity: Prevent partial or corrupted transactions
- Business Continuity: Keep operations running despite failures
- User Experience: Clear feedback and recovery options
- Compliance: Maintain audit trails even during failures
- Cost Reduction: Minimize manual intervention requirements
Implementation Architecture
Error Handler Service
Core error handling infrastructure:
// MepApps.Dash.Ar.Maint.PaymentReversal/Services/ErrorHandlerService.cs
public class ErrorHandlerService : IErrorHandlerService
{
private readonly ILogger<ErrorHandlerService> _logger;
private readonly IErrorRepository _errorRepository;
private readonly INotificationService _notificationService;
private readonly IRetryPolicyProvider _retryPolicyProvider;
public async Task<ErrorHandlingResult> HandleErrorAsync(
Exception exception,
ErrorContext context)
{
var result = new ErrorHandlingResult();
try
{
// Classify the error
var classification = ClassifyError(exception);
// Log with appropriate severity
LogError(exception, context, classification);
// Store error details
var errorId = await StoreErrorDetailsAsync(exception, context, classification);
result.ErrorId = errorId;
// Determine recovery strategy
var strategy = DetermineRecoveryStrategy(classification, context);
result.RecoveryStrategy = strategy;
// Execute recovery if applicable
if (strategy.CanRecover)
{
result.RecoveryResult = await ExecuteRecoveryAsync(strategy, context);
}
// Notify relevant parties
await NotifyStakeholdersAsync(exception, context, classification);
// Prepare user feedback
result.UserMessage = GenerateUserMessage(exception, classification);
result.TechnicalDetails = GenerateTechnicalDetails(exception, context);
return result;
}
catch (Exception handlingError)
{
// Last resort - log critical failure
_logger.LogCritical(handlingError,
"Critical failure in error handler. Original error: {OriginalError}",
exception.Message);
return new ErrorHandlingResult
{
Success = false,
UserMessage = "A critical error occurred. Please contact support.",
ErrorId = Guid.NewGuid().ToString()
};
}
}
private ErrorClassification ClassifyError(Exception exception)
{
return exception switch
{
// Business errors
ValidationException => new ErrorClassification
{
Category = ErrorCategory.Business,
Severity = ErrorSeverity.Warning,
IsTransient = false,
CanRetry = false,
RequiresUserAction = true
},
// SYSPRO errors
SysproException sysproEx => new ErrorClassification
{
Category = ErrorCategory.Integration,
Severity = DetermineSysproSeverity(sysproEx),
IsTransient = IsSysproTransient(sysproEx),
CanRetry = IsSysproRetryable(sysproEx),
RequiresUserAction = false
},
// Database errors
SqlException sqlEx => new ErrorClassification
{
Category = ErrorCategory.Database,
Severity = DetermineSqlSeverity(sqlEx),
IsTransient = IsSqlTransient(sqlEx),
CanRetry = sqlEx.Number != 2627, // Not duplicate key
RequiresUserAction = false
},
// Network errors
HttpRequestException or TaskCanceledException => new ErrorClassification
{
Category = ErrorCategory.Network,
Severity = ErrorSeverity.Error,
IsTransient = true,
CanRetry = true,
RequiresUserAction = false
},
// Default
_ => new ErrorClassification
{
Category = ErrorCategory.Unknown,
Severity = ErrorSeverity.Error,
IsTransient = false,
CanRetry = false,
RequiresUserAction = true
}
};
}
private bool IsSqlTransient(SqlException sqlEx)
{
// Transient SQL error numbers
var transientErrors = new[]
{
-2, // Timeout
20, // Deadlock
64, // Connection failure
233, // Connection initialization
10053, // Transport-level error
10054, // Transport-level error
10060, // Network timeout
40197, // Service error
40501, // Service busy
40613 // Database unavailable
};
return transientErrors.Contains(sqlEx.Number);
}
}
Retry Policy Implementation
Sophisticated retry mechanisms with exponential backoff:
// MepApps.Dash.Ar.Maint.PaymentReversal/Services/RetryPolicyProvider.cs
public class RetryPolicyProvider : IRetryPolicyProvider
{
private readonly ILogger<RetryPolicyProvider> _logger;
public IAsyncPolicy<T> GetPolicy<T>(RetryContext context)
{
return context.PolicyType switch
{
RetryPolicyType.Exponential => GetExponentialBackoffPolicy<T>(context),
RetryPolicyType.Linear => GetLinearRetryPolicy<T>(context),
RetryPolicyType.Circuit => GetCircuitBreakerPolicy<T>(context),
RetryPolicyType.Composite => GetCompositePolicy<T>(context),
_ => Policy.NoOpAsync<T>()
};
}
private IAsyncPolicy<T> GetExponentialBackoffPolicy<T>(RetryContext context)
{
return Policy
.HandleResult<T>(r => !context.ResultPredicate(r))
.Or<Exception>(ex => context.ExceptionPredicate(ex))
.WaitAndRetryAsync(
retryCount: context.MaxRetries,
sleepDurationProvider: retryAttempt =>
TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)) +
TimeSpan.FromMilliseconds(Random.Next(0, 1000)),
onRetry: async (outcome, timespan, retryCount, ctx) =>
{
var exception = outcome.Exception;
var result = outcome.Result;
_logger.LogWarning(
"Retry {RetryCount}/{MaxRetries} after {Delay}ms. " +
"Reason: {Reason}",
retryCount,
context.MaxRetries,
timespan.TotalMilliseconds,
exception?.Message ?? "Result condition not met");
// Execute custom retry callback
if (context.OnRetry != null)
{
await context.OnRetry(retryCount, timespan);
}
// Update metrics
UpdateRetryMetrics(context, retryCount);
});
}
private IAsyncPolicy<T> GetCircuitBreakerPolicy<T>(RetryContext context)
{
return Policy
.HandleResult<T>(r => !context.ResultPredicate(r))
.Or<Exception>(ex => context.ExceptionPredicate(ex))
.CircuitBreakerAsync(
handledEventsAllowedBeforeBreaking: context.CircuitBreakerThreshold,
durationOfBreak: context.CircuitBreakerDuration,
onBreak: (outcome, duration) =>
{
_logger.LogError(
"Circuit breaker opened for {Duration}s. Reason: {Reason}",
duration.TotalSeconds,
outcome.Exception?.Message ?? "Threshold exceeded");
// Notify about circuit break
NotifyCircuitBreakerOpen(context, duration);
},
onReset: () =>
{
_logger.LogInformation("Circuit breaker reset");
NotifyCircuitBreakerReset(context);
},
onHalfOpen: () =>
{
_logger.LogInformation("Circuit breaker half-open, testing...");
});
}
private IAsyncPolicy<T> GetCompositePolicy<T>(RetryContext context)
{
// Combine retry with circuit breaker
var retry = GetExponentialBackoffPolicy<T>(context);
var circuitBreaker = GetCircuitBreakerPolicy<T>(context);
var timeout = Policy.TimeoutAsync<T>(context.Timeout);
return Policy.WrapAsync(retry, circuitBreaker, timeout);
}
}
public class RetryContext
{
public RetryPolicyType PolicyType { get; set; }
public int MaxRetries { get; set; } = 3;
public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(30);
public Func<Exception, bool> ExceptionPredicate { get; set; }
public Func<object, bool> ResultPredicate { get; set; }
public Func<int, TimeSpan, Task> OnRetry { get; set; }
public int CircuitBreakerThreshold { get; set; } = 5;
public TimeSpan CircuitBreakerDuration { get; set; } = TimeSpan.FromSeconds(30);
}
Compensation Pattern Implementation
Reversing partially completed operations:
// MepApps.Dash.Ar.Maint.PaymentReversal/Services/CompensationService.cs
public class CompensationService : ICompensationService
{
private readonly ILogger<CompensationService> _logger;
private readonly Stack<CompensationAction> _compensationStack = new();
public async Task<CompensationResult> ExecuteWithCompensationAsync<T>(
Func<Task<T>> operation,
CompensationContext context)
{
var result = new CompensationResult();
try
{
// Record initial state for potential rollback
var snapshot = await CreateSnapshotAsync(context);
// Execute main operation
result.OperationResult = await operation();
result.Success = true;
// Clear compensation stack on success
_compensationStack.Clear();
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Operation failed, initiating compensation");
// Execute compensation actions in reverse order
result.CompensationResults = await ExecuteCompensationStackAsync();
result.Success = false;
result.Error = ex;
// Verify compensation success
if (result.CompensationResults.All(r => r.Success))
{
_logger.LogInformation("Compensation completed successfully");
result.CompensationSuccessful = true;
}
else
{
_logger.LogError("Compensation partially failed");
result.CompensationSuccessful = false;
// Store compensation failure for manual intervention
await StoreCompensationFailureAsync(context, result);
}
throw new CompensationException(
"Operation failed and was compensated",
ex,
result);
}
}
public void RegisterCompensation(CompensationAction action)
{
_compensationStack.Push(action);
_logger.LogDebug("Registered compensation action: {Action}",
action.Description);
}
private async Task<List<CompensationActionResult>> ExecuteCompensationStackAsync()
{
var results = new List<CompensationActionResult>();
while (_compensationStack.Count > 0)
{
var action = _compensationStack.Pop();
var result = new CompensationActionResult
{
Action = action,
StartTime = DateTime.Now
};
try
{
_logger.LogInformation("Executing compensation: {Description}",
action.Description);
await action.CompensateAsync();
result.Success = true;
result.EndTime = DateTime.Now;
_logger.LogInformation("Compensation successful: {Description}",
action.Description);
}
catch (Exception ex)
{
result.Success = false;
result.Error = ex;
result.EndTime = DateTime.Now;
_logger.LogError(ex, "Compensation failed: {Description}",
action.Description);
// Continue with other compensations despite failure
}
results.Add(result);
}
return results;
}
}
public class CompensationAction
{
public string Id { get; set; } = Guid.NewGuid().ToString();
public string Description { get; set; }
public Func<Task> CompensateAsync { get; set; }
public object State { get; set; }
public DateTime RegisteredAt { get; set; } = DateTime.Now;
}
Graceful Degradation
Maintaining functionality during partial failures:
// MepApps.Dash.Ar.Maint.PaymentReversal/Services/GracefulDegradationService.cs
public class GracefulDegradationService
{
private readonly ILogger<GracefulDegradationService> _logger;
private readonly Dictionary<string, ServiceHealth> _serviceHealth = new();
public async Task<T> ExecuteWithDegradationAsync<T>(
string serviceName,
Func<Task<T>> primaryOperation,
Func<Task<T>> fallbackOperation = null,
DegradationOptions options = null)
{
options ??= DegradationOptions.Default;
// Check service health
var health = GetServiceHealth(serviceName);
if (health.IsDegraded && fallbackOperation != null)
{
_logger.LogWarning("Service {Service} is degraded, using fallback",
serviceName);
return await ExecuteFallbackAsync(fallbackOperation, options);
}
try
{
// Attempt primary operation
var result = await primaryOperation();
// Update health on success
RecordSuccess(serviceName);
return result;
}
catch (Exception ex)
{
// Record failure
RecordFailure(serviceName, ex);
// Check if should degrade
if (ShouldDegrade(serviceName, options))
{
MarkServiceDegraded(serviceName, options.DegradationDuration);
}
// Try fallback if available
if (fallbackOperation != null)
{
_logger.LogWarning(ex,
"Primary operation failed for {Service}, attempting fallback",
serviceName);
return await ExecuteFallbackAsync(fallbackOperation, options);
}
throw;
}
}
private async Task<T> ExecuteFallbackAsync<T>(
Func<Task<T>> fallbackOperation,
DegradationOptions options)
{
try
{
return await fallbackOperation();
}
catch (Exception fallbackEx)
{
_logger.LogError(fallbackEx, "Fallback operation also failed");
if (options.DefaultValue != null && options.DefaultValue is T defaultValue)
{
_logger.LogWarning("Returning default value");
return defaultValue;
}
throw new DegradationException(
"Both primary and fallback operations failed",
fallbackEx);
}
}
private bool ShouldDegrade(string serviceName, DegradationOptions options)
{
var health = _serviceHealth[serviceName];
// Check failure rate
var failureRate = health.GetFailureRate(options.FailureWindow);
return failureRate > options.FailureThreshold ||
health.ConsecutiveFailures > options.ConsecutiveFailureLimit;
}
}
Error Recovery UI
User interface for error handling and recovery:
<!-- Error Recovery Dialog -->
<Window x:Class="MepApps.Dash.Ar.Maint.PaymentReversal.Dialogs.ErrorRecoveryDialog"
Title="Error Recovery"
Width="600" Height="500"
WindowStartupLocation="CenterOwner">
<Grid Margin="10">
<Grid.RowDefinitions>
<RowDefinition Height="Auto"/>
<RowDefinition Height="Auto"/>
<RowDefinition Height="*"/>
<RowDefinition Height="Auto"/>
<RowDefinition Height="Auto"/>
</Grid.RowDefinitions>
<!-- Error Icon and Message -->
<Grid Grid.Row="0">
<Grid.ColumnDefinitions>
<ColumnDefinition Width="Auto"/>
<ColumnDefinition Width="*"/>
</Grid.ColumnDefinitions>
<Image Grid.Column="0"
Source="/Images/error.png"
Width="48" Height="48"
Margin="0,0,10,0"/>
<StackPanel Grid.Column="1">
<TextBlock Text="An Error Occurred"
FontSize="16"
FontWeight="Bold"/>
<TextBlock Text="{Binding ErrorMessage}"
TextWrapping="Wrap"
Margin="0,5,0,0"/>
</StackPanel>
</Grid>
<!-- Error Classification -->
<Border Grid.Row="1"
Background="LightYellow"
BorderBrush="Orange"
BorderThickness="1"
Padding="5"
Margin="0,10,0,0">
<Grid>
<Grid.ColumnDefinitions>
<ColumnDefinition Width="Auto"/>
<ColumnDefinition Width="*"/>
</Grid.ColumnDefinitions>
<Grid.RowDefinitions>
<RowDefinition Height="Auto"/>
<RowDefinition Height="Auto"/>
<RowDefinition Height="Auto"/>
</Grid.RowDefinitions>
<TextBlock Grid.Row="0" Grid.Column="0" Text="Error Type:" Margin="0,2,10,2"/>
<TextBlock Grid.Row="0" Grid.Column="1" Text="{Binding ErrorCategory}" Margin="0,2"/>
<TextBlock Grid.Row="1" Grid.Column="0" Text="Severity:" Margin="0,2,10,2"/>
<TextBlock Grid.Row="1" Grid.Column="1" Text="{Binding ErrorSeverity}" Margin="0,2">
<TextBlock.Style>
<Style TargetType="TextBlock">
<Style.Triggers>
<DataTrigger Binding="{Binding ErrorSeverity}" Value="Critical">
<Setter Property="Foreground" Value="Red"/>
<Setter Property="FontWeight" Value="Bold"/>
</DataTrigger>
<DataTrigger Binding="{Binding ErrorSeverity}" Value="Error">
<Setter Property="Foreground" Value="DarkRed"/>
</DataTrigger>
<DataTrigger Binding="{Binding ErrorSeverity}" Value="Warning">
<Setter Property="Foreground" Value="Orange"/>
</DataTrigger>
</Style.Triggers>
</Style>
</TextBlock.Style>
</TextBlock>
<TextBlock Grid.Row="2" Grid.Column="0" Text="Error ID:" Margin="0,2,10,2"/>
<TextBlock Grid.Row="2" Grid.Column="1" Text="{Binding ErrorId}" Margin="0,2"/>
</Grid>
</Border>
<!-- Recovery Options -->
<GroupBox Grid.Row="2" Header="Recovery Options" Margin="0,10,0,0">
<ScrollViewer VerticalScrollBarVisibility="Auto">
<StackPanel>
<!-- Automatic Recovery -->
<RadioButton GroupName="Recovery"
IsChecked="{Binding UseAutomaticRecovery}"
IsEnabled="{Binding CanAutoRecover}"
Margin="5">
<StackPanel>
<TextBlock Text="Automatic Recovery" FontWeight="Bold"/>
<TextBlock Text="{Binding AutoRecoveryDescription}"
Foreground="Gray"
TextWrapping="Wrap"
Margin="20,0,0,0"/>
</StackPanel>
</RadioButton>
<!-- Retry Operation -->
<RadioButton GroupName="Recovery"
IsChecked="{Binding UseRetry}"
IsEnabled="{Binding CanRetry}"
Margin="5">
<StackPanel>
<TextBlock Text="Retry Operation" FontWeight="Bold"/>
<TextBlock Text="Attempt the operation again with the same parameters"
Foreground="Gray"
Margin="20,0,0,0"/>
<StackPanel Orientation="Horizontal" Margin="20,5,0,0">
<Label Content="Retry attempts:"/>
<TextBox Text="{Binding RetryAttempts}" Width="50"/>
<Label Content="Delay (seconds):"/>
<TextBox Text="{Binding RetryDelay}" Width="50"/>
</StackPanel>
</StackPanel>
</RadioButton>
<!-- Manual Recovery -->
<RadioButton GroupName="Recovery"
IsChecked="{Binding UseManualRecovery}"
Margin="5">
<StackPanel>
<TextBlock Text="Manual Recovery" FontWeight="Bold"/>
<TextBlock Text="Manually correct the issue and continue"
Foreground="Gray"
Margin="20,0,0,0"/>
</StackPanel>
</RadioButton>
<!-- Skip and Continue -->
<RadioButton GroupName="Recovery"
IsChecked="{Binding UseSkip}"
IsEnabled="{Binding CanSkip}"
Margin="5">
<StackPanel>
<TextBlock Text="Skip and Continue" FontWeight="Bold"/>
<TextBlock Text="Skip this operation and continue with the next"
Foreground="Gray"
Margin="20,0,0,0"/>
<TextBlock Text="⚠ Data may be incomplete"
Foreground="Orange"
Margin="20,0,0,0"/>
</StackPanel>
</RadioButton>
<!-- Abort -->
<RadioButton GroupName="Recovery"
IsChecked="{Binding UseAbort}"
Margin="5">
<StackPanel>
<TextBlock Text="Abort Operation" FontWeight="Bold"/>
<TextBlock Text="Cancel the entire operation and rollback changes"
Foreground="Gray"
Margin="20,0,0,0"/>
</StackPanel>
</RadioButton>
</StackPanel>
</ScrollViewer>
</GroupBox>
<!-- Technical Details -->
<Expander Grid.Row="3" Header="Technical Details" Margin="0,10,0,0">
<TextBox Text="{Binding TechnicalDetails}"
IsReadOnly="True"
VerticalScrollBarVisibility="Auto"
HorizontalScrollBarVisibility="Auto"
FontFamily="Consolas"
Height="100"/>
</Expander>
<!-- Buttons -->
<StackPanel Grid.Row="4"
Orientation="Horizontal"
HorizontalAlignment="Right"
Margin="0,10,0,0">
<Button Content="Execute Recovery"
Command="{Binding ExecuteRecoveryCommand}"
IsDefault="True"
Width="120"
Margin="0,0,5,0"/>
<Button Content="Cancel"
IsCancel="True"
Width="80"/>
</StackPanel>
</Grid>
</Window>
Error Recovery ViewModel
Managing error recovery workflow:
// MepApps.Dash.Ar.Maint.PaymentReversal/ViewModels/ErrorRecoveryViewModel.cs
public class ErrorRecoveryViewModel : BaseViewModel
{
private readonly IErrorHandlerService _errorHandler;
private readonly IRetryPolicyProvider _retryPolicy;
private readonly ICompensationService _compensationService;
public async Task ExecuteRecoveryAsync()
{
try
{
RecoveryResult result = null;
if (UseAutomaticRecovery)
{
result = await ExecuteAutomaticRecoveryAsync();
}
else if (UseRetry)
{
result = await ExecuteRetryRecoveryAsync();
}
else if (UseManualRecovery)
{
result = await ExecuteManualRecoveryAsync();
}
else if (UseSkip)
{
result = await ExecuteSkipRecoveryAsync();
}
else if (UseAbort)
{
result = await ExecuteAbortRecoveryAsync();
}
if (result.Success)
{
_logger.LogInformation("Recovery successful: {Strategy}", result.Strategy);
ShowSuccess("Recovery completed successfully");
DialogResult = true;
}
else
{
_logger.LogWarning("Recovery failed: {Reason}", result.FailureReason);
ShowError($"Recovery failed: {result.FailureReason}");
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during recovery execution");
ShowError("An error occurred during recovery");
}
}
private async Task<RecoveryResult> ExecuteRetryRecoveryAsync()
{
var retryContext = new RetryContext
{
PolicyType = RetryPolicyType.Exponential,
MaxRetries = RetryAttempts,
Timeout = TimeSpan.FromSeconds(RetryDelay * RetryAttempts),
ExceptionPredicate = ex => ex.GetType() == _originalException.GetType()
};
var policy = _retryPolicy.GetPolicy<RecoveryResult>(retryContext);
return await policy.ExecuteAsync(async () =>
{
// Re-execute original operation
return await _originalOperation();
});
}
}
Error Monitoring Dashboard
Real-time error monitoring and alerts:
public class ErrorMonitoringService
{
private readonly Subject<ErrorEvent> _errorStream = new();
public IObservable<ErrorStatistics> GetErrorStatistics(TimeSpan window)
{
return _errorStream
.Buffer(window)
.Select(errors => new ErrorStatistics
{
TotalErrors = errors.Count,
ErrorsByCategory = errors.GroupBy(e => e.Category)
.ToDictionary(g => g.Key, g => g.Count()),
ErrorsBySeverity = errors.GroupBy(e => e.Severity)
.ToDictionary(g => g.Key, g => g.Count()),
TopErrors = errors.GroupBy(e => e.Message)
.OrderByDescending(g => g.Count())
.Take(5)
.Select(g => new { Message = g.Key, Count = g.Count() })
.ToList(),
ErrorRate = errors.Count / window.TotalSeconds,
RecoverySuccessRate = errors.Count(e => e.RecoverySuccessful) /
(double)errors.Count(e => e.RecoveryAttempted)
});
}
}
Benefits
- Reliability: System remains stable despite failures
- User Experience: Clear feedback and recovery options
- Data Integrity: Compensation ensures consistency
- Operational Efficiency: Automatic recovery reduces manual work
- Observability: Comprehensive error tracking and monitoring
Related Documentation
Summary
The error recovery mechanisms in the AR Payment Reversal dashboard provide a robust framework for handling failures at all levels. Through intelligent classification, automatic retry logic, compensation patterns, and clear user feedback, the system maintains reliability and data integrity even in challenging operational conditions. The multi-layered approach ensures that both transient and permanent failures are handled appropriately while maintaining a positive user experience.