Server : Apache System : Linux iad1-shared-b8-43 6.6.49-grsec-jammy+ #10 SMP Thu Sep 12 23:23:08 UTC 2024 x86_64 User : dh_edsupp ( 6597262) PHP Version : 8.2.26 Disable Function : NONE Directory : /opt/prometheus-monitoring-scripts/lib/python3.10/site-packages/custom_exporters/dphactl/ |
Upload File : |
""" Verify Module - Handles verification checks. Used for verifying standby container configuration and reporting problems. """ import re import sys from ..base import BaseExporter from ..utils import run_command_silent, parse_json_silent, debug_log class CustomExporter(BaseExporter): """ Exporter for verification checks. Runs verification checks and reports metrics on problems found. """ def metric_config(self): """Define metrics for verification checks""" return { "dp_hactl_verify_problems_total": {"type": "Gauge", "labels": ["machine"]}, "dp_hactl_verify_problem_details": {"type": "Gauge", "labels": ["machine", "problem"]}, } def generate(self): """Run all verification checks and collect metrics""" try: self._check_verify() except Exception: # Exit silently on any exception sys.exit(1) def _categorize_problem(self, problem_text): """Categorize the problem based on the text""" # Extract the specific problem type from the message if "nginx.conf does not exist" in problem_text: return "missing_nginx_conf" elif "vhosts/*.conf does not exist" in problem_text: return "missing_vhost_conf" elif "php-fpm.d/*.conf does not exist" in problem_text: return "missing_php_fpm_conf" elif "systemd unit is not enabled" in problem_text: return "systemd_unit_not_enabled" else: # For any other problems, use a sanitized version of the text # Ensure the result follows Prometheus label naming rules # Must match regex [a-zA-Z_][a-zA-Z0-9_]* # Convert to lowercase and replace all non-alphanumeric chars with underscore sanitized = re.sub(r'[^a-z0-9_]', '_', problem_text.lower()) # Ensure it starts with a letter or underscore if sanitized and not sanitized[0].isalpha() and sanitized[0] != '_': sanitized = 'p_' + sanitized # Handle empty string case if not sanitized: sanitized = 'unknown_problem' # Remove consecutive underscores sanitized = re.sub(r'_+', '_', sanitized) # Truncate if too long if len(sanitized) > 50: sanitized = sanitized[:50] return sanitized def _check_verify(self): """Check the output of dp-ha-ctl verify""" verify_problems_metric = self.metrics.get("dp_hactl_verify_problems_total") verify_problem_details_metric = self.metrics.get("dp_hactl_verify_problem_details") debug_log("Running dp-ha-ctl verify check") # Run the command using the silent utility that handles errors result = run_command_silent( ["dp-ha-ctl", "--format", "json", "verify"], timeout=30 ) # If we get here, the command executed successfully if result.stdout.strip() in ["All clear", "[]"]: debug_log("No problems found in verify check") # No problems found - skip setting metric entirely return # Parse the JSON output using the silent utility problems = parse_json_silent(result.stdout) # Process the problems found and set metrics for problem in problems: machine = problem.get("container", "unknown") problem_list = problem.get("problems", []) problem_count = len(problem_list) # Set the total count for this machine verify_problems_metric.labels(machine=machine).set(problem_count) # Process each specific problem problem_types = {} for problem_text in problem_list: # Extract problem type from the message problem_type = self._categorize_problem(problem_text) # Count occurrences of each problem type if problem_type in problem_types: problem_types[problem_type] += 1 else: problem_types[problem_type] = 1 # Set metrics for each problem type for problem_type, count in problem_types.items(): verify_problem_details_metric.labels( machine=machine, problem=problem_type ).set(count)