Effective monitoring is crucial for maintaining healthy Java applications in production. This guide covers comprehensive monitoring strategies, tools, and best practices.
Key areas covered:
@Configuration
public class MetricsConfig {
@Bean
public MeterRegistry meterRegistry() {
CompositeMeterRegistry registry = new CompositeMeterRegistry();
// Add Prometheus registry
registry.add(new PrometheusMeterRegistry(
PrometheusConfig.DEFAULT));
// Add JMX registry
registry.add(new JmxMeterRegistry(
JmxConfig.DEFAULT,
Clock.SYSTEM));
return registry;
}
@Bean
public TimedAspect timedAspect(MeterRegistry registry) {
return new TimedAspect(registry);
}
}
@Service
public class MonitoredService {
private final MeterRegistry registry;
@Timed(value = "service.operation",
description = "Time taken to process operation")
public void performOperation() {
Timer.Sample sample = Timer.start(registry);
try {
// Business logic
} finally {
sample.stop(registry.timer("operation.timer"));
}
}
public void recordMetrics() {
// Counter example
Counter requests = registry.counter("requests.total");
requests.increment();
// Gauge example
Gauge.builder("queue.size", queue, Queue::size)
.description("Current queue size")
.register(registry);
// Distribution summary
DistributionSummary summary = registry
.summary("response.size");
summary.record(responseSize);
}
}
@Component
public class CustomMetrics {
private final MeterRegistry registry;
// Business metrics
public void recordBusinessMetric(String operation,
double value) {
registry.gauge("business.metric." + operation,
value);
}
// Performance metrics
public void recordLatency(String operation,
long startTime) {
long duration = System.nanoTime() - startTime;
registry.timer("operation.latency." + operation)
.record(duration, TimeUnit.NANOSECONDS);
}
// Error metrics
public void recordError(String operation,
Throwable error) {
Counter.builder("errors.total")
.tag("operation", operation)
.tag("type", error.getClass().getSimpleName())
.register(registry)
.increment();
}
// Resource metrics
public void recordResourceUsage() {
OperatingSystemMXBean osBean =
ManagementFactory.getOperatingSystemMXBean();
Gauge.builder("system.cpu.usage",
osBean,
bean -> ((com.sun.management.OperatingSystemMXBean) bean)
.getProcessCpuLoad())
.register(registry);
Gauge.builder("system.memory.usage",
osBean,
bean -> ((com.sun.management.OperatingSystemMXBean) bean)
.getTotalPhysicalMemorySize())
.register(registry);
}
}
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="CONSOLE"
class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>
%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
</pattern>
</encoder>
</appender>
<appender name="FILE"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/application.log</file>
<rollingPolicy
class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>
logs/application-%d{yyyy-MM-dd}.log
</fileNamePattern>
<maxHistory>30</maxHistory>
</rollingPolicy>
<encoder>
<pattern>
%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="CONSOLE" />
<appender-ref ref="FILE" />
</root>
</configuration>
@Slf4j
@Service
public class LoggingService {
public void logOperation(String operation,
Map context) {
MDC.put("operation", operation);
MDC.put("traceId", getTraceId());
try {
log.info("Operation started: {}",
JsonUtils.toJson(context));
// Perform operation
log.info("Operation completed: {}", operation);
} catch (Exception e) {
log.error("Operation failed: {}", operation, e);
throw e;
} finally {
MDC.clear();
}
}
@Around("@annotation(Monitored)")
public Object logMethod(ProceedingJoinPoint joinPoint)
throws Throwable {
String methodName = joinPoint.getSignature().getName();
long startTime = System.currentTimeMillis();
try {
log.info("Method {} started", methodName);
Object result = joinPoint.proceed();
log.info("Method {} completed in {}ms",
methodName,
System.currentTimeMillis() - startTime);
return result;
} catch (Exception e) {
log.error("Method {} failed", methodName, e);
throw e;
}
}
}
@Configuration
public class HealthConfig {
@Bean
public HealthIndicator databaseHealth() {
return new HealthIndicator() {
@Override
public Health health() {
try {
// Check database connection
return Health.up()
.withDetail("database", "PostgreSQL")
.withDetail("status", "Connected")
.build();
} catch (Exception e) {
return Health.down()
.withException(e)
.build();
}
}
};
}
}
@Component
public class CustomHealthIndicator implements HealthIndicator {
private final ExternalService service;
@Override
public Health health() {
try {
ServiceStatus status = service.checkStatus();
return Health.up()
.withDetail("service", status.getName())
.withDetail("status", status.getState())
.withDetail("lastChecked",
Instant.now().toString())
.build();
} catch (Exception e) {
return Health.down()
.withException(e)
.build();
}
}
}
@Service
public class HealthCheckService {
private final List healthChecks;
public HealthStatus checkSystem() {
Map results = new HashMap<>();
boolean isHealthy = true;
for (HealthCheck check : healthChecks) {
try {
HealthResult result = check.execute();
results.put(check.getName(), result);
isHealthy &= result.isHealthy();
} catch (Exception e) {
results.put(check.getName(),
HealthResult.failed(e));
isHealthy = false;
}
}
return new HealthStatus(isHealthy, results);
}
}
@Component
public class MemoryHealthCheck implements HealthCheck {
@Override
public HealthResult execute() {
Runtime runtime = Runtime.getRuntime();
long maxMemory = runtime.maxMemory();
long usedMemory = runtime.totalMemory() -
runtime.freeMemory();
double memoryUsage =
(double) usedMemory / maxMemory * 100;
boolean healthy = memoryUsage < 90;
return HealthResult.builder()
.healthy(healthy)
.component("Memory")
.detail("usage", String.format("%.2f%%",
memoryUsage))
.detail("max", formatBytes(maxMemory))
.detail("used", formatBytes(usedMemory))
.build();
}
}
@Configuration
public class TracingConfig {
@Bean
public Tracer tracer() {
return Tracing.newBuilder()
.localServiceName("my-service")
.spanReporter(spanReporter())
.build()
.tracer();
}
@Bean
public SpanReporter spanReporter() {
return new ZipkinSpanReporter();
}
}
@Service
public class TracedService {
private final Tracer tracer;
public void performOperation() {
Span span = tracer.nextSpan()
.name("operation")
.tag("custom.tag", "value");
try (SpanInScope ws = tracer.withSpanInScope(span)) {
span.start();
// Business logic
span.tag("result", "success");
} catch (Exception e) {
span.tag("error", e.getMessage());
throw e;
} finally {
span.finish();
}
}
}
@Aspect
@Component
public class TracingAspect {
private final Tracer tracer;
@Around("@annotation(Traced)")
public Object traceMethod(ProceedingJoinPoint joinPoint)
throws Throwable {
String methodName = joinPoint.getSignature().getName();
Span span = tracer.nextSpan()
.name(methodName)
.tag("class",
joinPoint.getTarget().getClass().getName());
try (SpanInScope ws = tracer.withSpanInScope(span)) {
span.start();
Object result = joinPoint.proceed();
span.tag("result", "success");
return result;
} catch (Exception e) {
span.tag("error", e.getMessage());
span.tag("error.type",
e.getClass().getSimpleName());
throw e;
} finally {
span.finish();
}
}
}
@Configuration
public class AlertConfig {
@Bean
public AlertManager alertManager() {
return AlertManager.builder()
.withEmailNotifier(emailConfig())
.withSlackNotifier(slackConfig())
.withPagerDutyNotifier(pagerDutyConfig())
.build();
}
}
@Service
public class AlertService {
private final AlertManager alertManager;
private final MeterRegistry registry;
public void configureAlerts() {
// CPU usage alert
registry.gauge("system.cpu.usage")
.tag("type", "system")
.baseUnit("percent")
.register(registry)
.gauge(value -> {
if (value > 90) {
alertManager.sendAlert(
Alert.builder()
.severity(Severity.HIGH)
.message("High CPU usage: " + value)
.build()
);
}
return value;
});
// Error rate alert
registry.counter("errors.total")
.tag("type", "application")
.register(registry)
.count(value -> {
if (value > 100) {
alertManager.sendAlert(
Alert.builder()
.severity(Severity.CRITICAL)
.message("High error rate: " + value)
.build()
);
}
return value;
});
}
}
Effective monitoring is crucial for maintaining healthy Java applications in production. By implementing comprehensive monitoring strategies and following best practices, you can ensure better application reliability and performance.
Remember to regularly review and update your monitoring setup based on your application's evolving needs and requirements.