AutoDmp.cpp 16 KB


  1. #include "AutoDmp.h"
  2. #include <iostream>
  3. #include <sstream>
  4. #include <fstream>
  5. #include <iomanip>
  6. #include <chrono>
  7. #include <algorithm>
  8. #include <vector>
  9. #include <cstring>
  10. #include <ctime>
  11. #include <sys/stat.h>
  12. #include <sys/types.h>
  13. #include <dirent.h>
  14. #include <errno.h>
  15. #include <dlfcn.h>
  16. // 静态成员初始化
  17. AutoDmp* AutoDmp::instance_ = nullptr;
  18. // Linux 信号处理列表
  19. const int AutoDmp::handledSignals_[] = {
  20. SIGSEGV, // 段错误
  21. SIGABRT, // 异常终止
  22. SIGFPE, // 浮点异常
  23. SIGILL, // 非法指令
  24. SIGBUS, // 总线错误
  25. SIGSYS, // 系统调用错误
  26. SIGPIPE, // 管道错误
  27. SIGQUIT // 退出信号
  28. };
  29. const size_t AutoDmp::handledSignalCount_ = sizeof(handledSignals_) / sizeof(handledSignals_[0]);
  30. // 构造函数
  31. AutoDmp::AutoDmp(const CrashConfig& config) : config_(config) {
  32. if (instance_ == nullptr) {
  33. instance_ = this;
  34. if (initializeHandlers()) {
  35. initialized_.store(true);
  36. }
  37. }
  38. }
  39. // 析构函数
  40. AutoDmp::~AutoDmp() {
  41. if (instance_ == this) {
  42. cleanupHandlers();
  43. instance_ = nullptr;
  44. initialized_.store(false);
  45. }
  46. }
  47. // Linux 信号处理器
  48. void AutoDmp::signalHandler(int sig, siginfo_t* info, void* context) {
  49. // 防止重入
  50. static volatile sig_atomic_t handling = 0;
  51. if (handling || !instance_ || instance_->crashInProgress_.exchange(true)) {
  52. return;
  53. }
  54. handling = 1;
  55. const CrashConfig* config = getInstanceConfig();
  56. if (!config) {
  57. // 恢复默认处理并重新触发
  58. signal(sig, SIG_DFL);
  59. raise(sig);
  60. return;
  61. }
  62. // 使用异步安全的方式输出基本信息
  63. if (config->enableConsoleOutput) {
  64. safeWriteToFd(STDERR_FILENO, "\n*** LINUX CRASH DETECTED ***\n");
  65. safeWriteToFd(STDERR_FILENO, "Signal: ");
  66. safeWriteNumber(STDERR_FILENO, sig);
  67. safeWriteToFd(STDERR_FILENO, " (");
  68. safeWriteToFd(STDERR_FILENO, getSignalName(sig).c_str());
  69. safeWriteToFd(STDERR_FILENO, ")\n");
  70. if (info->si_addr) {
  71. safeWriteToFd(STDERR_FILENO, "Fault Address: ");
  72. safeWritePointer(STDERR_FILENO, info->si_addr);
  73. safeWriteToFd(STDERR_FILENO, "\n");
  74. }
  75. safeWriteToFd(STDERR_FILENO, "Process ID: ");
  76. safeWriteNumber(STDERR_FILENO, getpid());
  77. safeWriteToFd(STDERR_FILENO, "\n");
  78. safeWriteToFd(STDERR_FILENO, "Thread ID: ");
  79. safeWriteNumber(STDERR_FILENO, syscall(SYS_gettid));
  80. safeWriteToFd(STDERR_FILENO, "\n");
  81. // 输出调用栈到stderr(异步安全)
  82. if (config->enableBacktrace) {
  83. safeWriteToFd(STDERR_FILENO, "\nBacktrace:\n");
  84. void* frames[128];
  85. int numFrames = backtrace(frames, 128);
  86. if (numFrames > 0) {
  87. backtrace_symbols_fd(frames, numFrames, STDERR_FILENO);
  88. }
  89. }
  90. safeWriteToFd(STDERR_FILENO, "****************************\n\n");
  91. }
  92. // 设置 core dump
  93. if (config->enableCoreDump) {
  94. setupCoreDump(config->crashDirectory);
  95. }
  96. // 恢复默认信号处理并重新触发
  97. struct sigaction sa;
  98. sa.sa_handler = SIG_DFL;
  99. sigemptyset(&sa.sa_mask);
  100. sa.sa_flags = 0;
  101. sigaction(sig, &sa, nullptr);
  102. // 如果不是在异步安全模式,尝试写入详细日志
  103. if (!config->asyncSafeOnly) {
  104. try {
  105. // 收集详细崩溃信息
  106. CrashInfo crashInfo;
  107. crashInfo.timestamp = getCurrentTimestamp();
  108. crashInfo.processName = getProcessName();
  109. crashInfo.processId = getpid();
  110. crashInfo.threadId = syscall(SYS_gettid);
  111. crashInfo.buildInfo = std::string(__DATE__) + " " + __TIME__;
  112. crashInfo.signalNumber = sig;
  113. crashInfo.signalName = getSignalName(sig);
  114. crashInfo.faultAddress = info->si_addr;
  115. crashInfo.signalDescription = getSignalDescription(sig, info);
  116. crashInfo.backtrace = getBacktrace(context, config->maxBacktraceDepth);
  117. // 确保目录存在
  118. createDirectory(config->crashDirectory);
  119. // 写入详细日志
  120. if (config->enableLogFile) {
  121. std::string logFileName = "crash_" + crashInfo.timestamp + ".log";
  122. std::string logPath = config->crashDirectory + "/" + logFileName;
  123. writeCrashLog(crashInfo, logPath);
  124. cleanupOldFiles(config->crashDirectory, config->maxCrashFiles);
  125. }
  126. // 输出详细信息到控制台
  127. if (config->enableDetailedInfo && config->enableConsoleOutput) {
  128. outputDetailedInfo(crashInfo);
  129. }
  130. } catch (...) {
  131. // 在信号处理器中不抛出异常
  132. safeWriteToFd(STDERR_FILENO, "Warning: Failed to write detailed crash log\n");
  133. }
  134. }
  135. // 触发默认处理(生成core dump并终止进程)
  136. raise(sig);
  137. }
  138. // 异步安全的写入函数
  139. void AutoDmp::safeWriteToFd(int fd, const char* msg) {
  140. if (msg) {
  141. size_t len = strlen(msg);
  142. write(fd, msg, len);
  143. }
  144. }
  145. void AutoDmp::safeWriteNumber(int fd, long number) {
  146. char buffer[32];
  147. char* p = buffer + sizeof(buffer) - 1;
  148. *p = '\0';
  149. bool negative = number < 0;
  150. if (negative) number = -number;
  151. if (number == 0) {
  152. *--p = '0';
  153. } else {
  154. while (number > 0) {
  155. *--p = '0' + (number % 10);
  156. number /= 10;
  157. }
  158. }
  159. if (negative) {
  160. *--p = '-';
  161. }
  162. safeWriteToFd(fd, p);
  163. }
  164. void AutoDmp::safeWritePointer(int fd, void* ptr) {
  165. char buffer[32];
  166. snprintf(buffer, sizeof(buffer), "%p", ptr);
  167. safeWriteToFd(fd, buffer);
  168. }
  169. // 获取调用栈
  170. std::string AutoDmp::getBacktrace(void* context, size_t maxDepth) {
  171. void* frames[maxDepth];
  172. int numFrames = backtrace(frames, maxDepth);
  173. if (numFrames <= 0) {
  174. return "Unable to get backtrace";
  175. }
  176. // 获取符号信息
  177. char** symbols = backtrace_symbols(frames, numFrames);
  178. if (!symbols) {
  179. return "Unable to get symbol information";
  180. }
  181. std::ostringstream oss;
  182. for (int i = 0; i < numFrames; ++i) {
  183. oss << "#" << std::setw(2) << std::setfill('0') << i
  184. << ": " << symbols[i] << "\n";
  185. }
  186. free(symbols);
  187. return oss.str();
  188. }
  189. // 设置 core dump
  190. bool AutoDmp::setupCoreDump(const std::string& crashDirectory) {
  191. // 设置 core dump 大小限制
  192. struct rlimit coreLimit;
  193. coreLimit.rlim_cur = RLIM_INFINITY;
  194. coreLimit.rlim_max = RLIM_INFINITY;
  195. if (setrlimit(RLIMIT_CORE, &coreLimit) != 0) {
  196. return false;
  197. }
  198. // 设置 core 文件路径和命名格式
  199. if (!crashDirectory.empty()) {
  200. // 构造 core_pattern: /path/to/crashes/core.%e.%p.%t
  201. // %e = 执行文件名, %p = PID, %t = 时间戳
  202. std::string corePattern = crashDirectory + "/core.%e.%p.%t";
  203. // 写入 /proc/sys/kernel/core_pattern
  204. std::ofstream corePatternFile("/proc/sys/kernel/core_pattern");
  205. if (corePatternFile.is_open()) {
  206. corePatternFile << corePattern;
  207. corePatternFile.close();
  208. return true;
  209. }
  210. }
  211. return true; // 即使设置路径失败,至少启用了 core dump
  212. }
  213. // 获取信号名
  214. std::string AutoDmp::getSignalName(int sig) {
  215. switch (sig) {
  216. case SIGSEGV: return "SIGSEGV";
  217. case SIGABRT: return "SIGABRT";
  218. case SIGFPE: return "SIGFPE";
  219. case SIGILL: return "SIGILL";
  220. case SIGBUS: return "SIGBUS";
  221. case SIGSYS: return "SIGSYS";
  222. case SIGPIPE: return "SIGPIPE";
  223. case SIGQUIT: return "SIGQUIT";
  224. default: return "UNKNOWN";
  225. }
  226. }
  227. // 获取信号描述
  228. std::string AutoDmp::getSignalDescription(int sig, siginfo_t* info) {
  229. std::ostringstream oss;
  230. oss << getSignalName(sig) << " - ";
  231. switch (sig) {
  232. case SIGSEGV:
  233. oss << "Segmentation fault";
  234. if (info) {
  235. switch (info->si_code) {
  236. case SEGV_MAPERR: oss << " (address not mapped)"; break;
  237. case SEGV_ACCERR: oss << " (invalid permissions)"; break;
  238. default: break;
  239. }
  240. }
  241. break;
  242. case SIGABRT:
  243. oss << "Abort signal";
  244. break;
  245. case SIGFPE:
  246. oss << "Floating point exception";
  247. if (info) {
  248. switch (info->si_code) {
  249. case FPE_INTDIV: oss << " (integer divide by zero)"; break;
  250. case FPE_INTOVF: oss << " (integer overflow)"; break;
  251. case FPE_FLTDIV: oss << " (floating point divide by zero)"; break;
  252. case FPE_FLTOVF: oss << " (floating point overflow)"; break;
  253. case FPE_FLTUND: oss << " (floating point underflow)"; break;
  254. case FPE_FLTRES: oss << " (floating point inexact result)"; break;
  255. case FPE_FLTINV: oss << " (floating point invalid operation)"; break;
  256. default: break;
  257. }
  258. }
  259. break;
  260. case SIGILL:
  261. oss << "Illegal instruction";
  262. if (info) {
  263. switch (info->si_code) {
  264. case ILL_ILLOPC: oss << " (illegal opcode)"; break;
  265. case ILL_ILLOPN: oss << " (illegal operand)"; break;
  266. case ILL_ILLADR: oss << " (illegal addressing mode)"; break;
  267. case ILL_ILLTRP: oss << " (illegal trap)"; break;
  268. default: break;
  269. }
  270. }
  271. break;
  272. case SIGBUS:
  273. oss << "Bus error";
  274. if (info) {
  275. switch (info->si_code) {
  276. case BUS_ADRALN: oss << " (invalid address alignment)"; break;
  277. case BUS_ADRERR: oss << " (non-existent physical address)"; break;
  278. case BUS_OBJERR: oss << " (object specific hardware error)"; break;
  279. default: break;
  280. }
  281. }
  282. break;
  283. case SIGSYS:
  284. oss << "Bad system call";
  285. break;
  286. case SIGPIPE:
  287. oss << "Broken pipe";
  288. break;
  289. case SIGQUIT:
  290. oss << "Quit signal";
  291. break;
  292. default:
  293. oss << "Unknown signal (" << sig << ")";
  294. break;
  295. }
  296. return oss.str();
  297. }
  298. // 获取当前时间戳
  299. std::string AutoDmp::getCurrentTimestamp() {
  300. auto now = std::chrono::system_clock::now();
  301. auto time_t = std::chrono::system_clock::to_time_t(now);
  302. auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
  303. now.time_since_epoch()) % 1000;
  304. std::tm* tm = std::localtime(&time_t);
  305. std::ostringstream oss;
  306. oss << std::put_time(tm, "%Y%m%d-%H%M%S")
  307. << "." << std::setfill('0') << std::setw(3) << ms.count();
  308. return oss.str();
  309. }
  310. // 获取进程名
  311. std::string AutoDmp::getProcessName() {
  312. std::string processName = "Unknown";
  313. char buffer[PATH_MAX];
  314. ssize_t len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1);
  315. if (len > 0) {
  316. buffer[len] = '\0';
  317. std::string fullPath = buffer;
  318. size_t pos = fullPath.find_last_of('/');
  319. if (pos != std::string::npos) {
  320. processName = fullPath.substr(pos + 1);
  321. }
  322. }
  323. return processName;
  324. }
  325. // 创建目录
  326. bool AutoDmp::createDirectory(const std::string& path) {
  327. if (path.empty()) return false;
  328. return (mkdir(path.c_str(), 0755) == 0) || (errno == EEXIST);
  329. }
  330. // 清理旧文件
  331. void AutoDmp::cleanupOldFiles(const std::string& directory, size_t maxFiles) {
  332. if (maxFiles == 0) return;
  333. std::vector<std::pair<std::string, std::time_t>> logFiles;
  334. std::vector<std::pair<std::string, std::time_t>> coreFiles;
  335. DIR* dir = opendir(directory.c_str());
  336. if (dir) {
  337. struct dirent* entry;
  338. while ((entry = readdir(dir)) != nullptr) {
  339. std::string filename = entry->d_name;
  340. std::string fullPath = directory + "/" + filename;
  341. struct stat fileStat;
  342. // 收集 crash log 文件
  343. if (filename.find("crash_") == 0 && filename.find(".log") != std::string::npos) {
  344. if (stat(fullPath.c_str(), &fileStat) == 0) {
  345. logFiles.emplace_back(fullPath, fileStat.st_mtime);
  346. }
  347. }
  348. // 收集 core dump 文件 (core.* 格式)
  349. else if (filename.find("core.") == 0) {
  350. if (stat(fullPath.c_str(), &fileStat) == 0) {
  351. coreFiles.emplace_back(fullPath, fileStat.st_mtime);
  352. }
  353. }
  354. }
  355. closedir(dir);
  356. }
  357. // 清理 log 文件
  358. std::sort(logFiles.begin(), logFiles.end(),
  359. [](const auto& a, const auto& b) { return a.second > b.second; });
  360. for (size_t i = maxFiles; i < logFiles.size(); ++i) {
  361. std::remove(logFiles[i].first.c_str());
  362. }
  363. // 清理 core 文件
  364. std::sort(coreFiles.begin(), coreFiles.end(),
  365. [](const auto& a, const auto& b) { return a.second > b.second; });
  366. for (size_t i = maxFiles; i < coreFiles.size(); ++i) {
  367. std::remove(coreFiles[i].first.c_str());
  368. }
  369. }
  370. // 写入崩溃日志
  371. void AutoDmp::writeCrashLog(const CrashInfo& crashInfo, const std::string& filePath) {
  372. std::ofstream logFile(filePath, std::ios::app);
  373. if (!logFile.is_open()) return;
  374. logFile << "==================== LINUX CRASH REPORT ====================\n";
  375. logFile << "Timestamp: " << crashInfo.timestamp << "\n";
  376. logFile << "Process: " << crashInfo.processName << " (PID: " << crashInfo.processId << ")\n";
  377. logFile << "Thread ID: " << crashInfo.threadId << "\n";
  378. logFile << "Build Info: " << crashInfo.buildInfo << "\n";
  379. logFile << "Signal: " << crashInfo.signalName << " (" << crashInfo.signalNumber << ")\n";
  380. logFile << "Signal Description: " << crashInfo.signalDescription << "\n";
  381. logFile << "Fault Address: " << crashInfo.faultAddress << "\n";
  382. logFile << "\nDetailed Backtrace:\n";
  383. logFile << crashInfo.backtrace << "\n";
  384. logFile << "============================================================\n\n";
  385. logFile.close();
  386. }
  387. // 输出详细信息
  388. void AutoDmp::outputDetailedInfo(const CrashInfo& crashInfo) {
  389. std::cerr << "\n========== DETAILED CRASH INFORMATION ==========\n";
  390. std::cerr << "Timestamp: " << crashInfo.timestamp << "\n";
  391. std::cerr << "Process: " << crashInfo.processName << " (PID: " << crashInfo.processId << ")\n";
  392. std::cerr << "Thread: " << crashInfo.threadId << "\n";
  393. std::cerr << "Build: " << crashInfo.buildInfo << "\n";
  394. std::cerr << "Signal: " << crashInfo.signalDescription << "\n";
  395. std::cerr << "Address: " << crashInfo.faultAddress << "\n";
  396. std::cerr << "\nBacktrace:\n" << crashInfo.backtrace;
  397. std::cerr << "===============================================\n\n";
  398. }
  399. // 初始化处理器
  400. bool AutoDmp::initializeHandlers() {
  401. if (!createDirectory(config_.crashDirectory)) {
  402. return false;
  403. }
  404. // 设置 core dump
  405. if (config_.enableCoreDump) {
  406. setupCoreDump(config_.crashDirectory);
  407. }
  408. // 安装信号处理器
  409. struct sigaction sa;
  410. sa.sa_sigaction = signalHandler;
  411. sigemptyset(&sa.sa_mask);
  412. sa.sa_flags = SA_SIGINFO | SA_RESTART;
  413. bool success = true;
  414. for (size_t i = 0; i < handledSignalCount_; ++i) {
  415. if (sigaction(handledSignals_[i], &sa, &oldActions_[i]) != 0) {
  416. success = false;
  417. }
  418. }
  419. return success;
  420. }
  421. // 清理处理器
  422. void AutoDmp::cleanupHandlers() {
  423. for (size_t i = 0; i < handledSignalCount_; ++i) {
  424. sigaction(handledSignals_[i], &oldActions_[i], nullptr);
  425. }
  426. }
  427. // 获取实例配置
  428. const CrashConfig* AutoDmp::getInstanceConfig() {
  429. return instance_ ? &instance_->config_ : nullptr;
  430. }
  431. // 生成测试崩溃
  432. bool AutoDmp::generateTestCrash() const {
  433. if (!initialized_.load()) {
  434. return false;
  435. }
  436. // 触发一个安全的测试信号
  437. raise(SIGUSR1);
  438. return true;
  439. }
  440. // 更新配置
  441. void AutoDmp::updateConfig(const CrashConfig& newConfig) {
  442. config_ = newConfig;
  443. createDirectory(config_.crashDirectory);
  444. }
  445. // 获取支持的信号列表
  446. const int* AutoDmp::getSupportedSignals() {
  447. return handledSignals_;
  448. }
  449. size_t AutoDmp::getSupportedSignalCount() {
  450. return handledSignalCount_;
  451. }