From 8a8ee8c6b17ccb592166fd2f60547f22cfb2e977 Mon Sep 17 00:00:00 2001 From: Jacquin Antoine Date: Mon, 2 Mar 2026 20:59:38 +0100 Subject: [PATCH] FEATURE: Add missing JSON fields and fix socket type per architecture.yml - Add req_id, scheme, unparsed_uri, args, keepalives, content_length fields - Change socket type from SOCK_STREAM to SOCK_DGRAM - Update architecture.yml documentation with new fields - Bump version to 1.0.8 with changelog entry Co-authored-by: Qwen-Coder --- architecture.yml | 285 +++++++++++++++++++++++++++----------------- mod_reqin_log.spec | 6 +- src/mod_reqin_log.c | 80 ++++++++++++- 3 files changed, 260 insertions(+), 111 deletions(-) diff --git a/architecture.yml b/architecture.yml index 01f4a51..91f9a01 100644 --- a/architecture.yml +++ b/architecture.yml @@ -80,6 +80,7 @@ module: - State stored via ap_get_module_config(s->module_config) - No global variables for socket state - Each process has independent: socket_fd, connect timers, error timers + data_model: json_line: description: > @@ -100,6 +101,16 @@ module: Note: apr_time_now() returns microseconds with microsecond precision. The nanosecond representation is for API compatibility only. example: 1708948770000000000 + - name: req_id + type: string + description: > + Unique request identifier generated by Apache 2.4 (r->log_id). + Useful for cross-referencing this log with standard access and error logs. + example: "Yj2x-abcd-1234" + - name: scheme + type: string + description: Connection scheme evaluated via ap_http_scheme(r). + example: "https" - name: src_ip type: string example: "192.0.2.10" @@ -115,15 +126,41 @@ module: - name: method type: string example: "GET" + - name: unparsed_uri + type: string + description: > + Raw, uncleaned URI exactly as requested by the client (r->unparsed_uri). + Essential for detecting Path Traversal or bot anomalies before Apache normalizes it. + example: "//dossier/../api/users" - name: path type: string - example: "/foo/bar" + description: Cleaned and normalized path (r->parsed_uri.path). + example: "/api/users" + - name: args + type: string + description: > + Query string parameters from the URL (r->args). + Allows detection of payloads like SQLi or XSS passed in GET requests. + example: "id=1%20UNION%20SELECT" - name: host type: string example: "example.com" - name: http_version type: string example: "HTTP/1.1" + - name: keepalives + type: integer + description: > + Number of requests served over the current connection (r->connection->keepalives). + If 0, it indicates a newly established TCP connection. + If > 0, it confirms an active Keep-Alive session. + example: 2 + - name: content_length + type: integer + description: > + Declared size of the request body (POST payload), + extracted directly from the 'Content-Length' header. + example: 1048576 - name: header_ type: string description: > @@ -135,19 +172,8 @@ module: example: header_X-Request-Id: "abcd-1234" header_User-Agent: "curl/7.70.0" - example_full: - time: "2026-02-26T11:59:30Z" - timestamp: 1708948770000000000 - src_ip: "192.0.2.10" - src_port: 45678 - dst_ip: "198.51.100.5" - dst_port: 443 - method: "GET" - path: "/api/users" - host: "example.com" - http_version: "HTTP/1.1" - header_X-Request-Id: "abcd-1234" - header_User-Agent: "curl/7.70.0" + example_full: | + {"time":"2026-02-26T11:59:30Z","timestamp":1708948770000000000,"req_id":"Yj2x-abcd-1234","scheme":"https","src_ip":"192.0.2.10","src_port":45678,"dst_ip":"198.51.100.5","dst_port":443,"method":"GET","unparsed_uri":"//api/users?id=1","path":"/api/users","args":"id=1","host":"example.com","http_version":"HTTP/1.1","keepalives":0,"content_length":0,"header_X-Request-Id":"abcd-1234","header_User-Agent":"curl/7.70.0"} configuration: scope: global @@ -222,62 +248,69 @@ configuration: JSON key is omitted from the log entry. - Header values are truncated to JsonSockLogMaxHeaderValueLen characters. -io: - socket: - type: unix-domain - mode: client - path_source: JsonSockLogSocket - connection: - persistence: true - non_blocking: true - lifecycle: - open: - - Attempt initial connection during child_init if enabled. - - On first log attempt after reconnect interval expiry if not yet connected. - failure: - - On connection failure, mark socket as unavailable. - - Do not block the worker process. - reconnect: - strategy: time-based - interval_seconds: "@config.JsonSockLogReconnectInterval" - trigger: > - When a request arrives and the last connect attempt time is older - than reconnect interval, a new connect is attempted. - write: - format: "json_object + '\\n'" - mode: non-blocking - error_handling: - on_eagain: - action: drop-current-log-line - note: do not retry for this request. - on_epipe_or_conn_reset: - action: - - close_socket - - mark_unavailable - - schedule_reconnect - generic_errors: - action: drop-current-log-line - drop_policy: - description: > - Logging errors never impact client response. The current log line - is silently dropped (except for throttled error_log reporting). + io: + socket: + type: unix-domain + protocol: SOCK_DGRAM + mode: client + path_source: JsonSockLogSocket + connection: + persistence: false + non_blocking: true + lifecycle: + open: + - Create DGRAM socket and set default destination address via connect() + during child_init if enabled. + - Re-attempt addressing after reconnect interval expiry if target + was previously unavailable. + failure: + - On missing target socket (ECONNREFUSED/ENOENT), mark target as unavailable. + - Do not block the worker process. + reconnect: + strategy: time-based + interval_seconds: config.JsonSockLogReconnectInterval + trigger: > + When a request arrives and the last target resolution attempt time is older + than reconnect interval, a new attempt to address the socket is made. + write: + format: json_object + mode: non-blocking + atomicity: > + Full JSON line is sent as a single datagram. Message size must not exceed + system DGRAM limits or MAX_JSON_SIZE (64KB). + error_handling: + on_eagain_or_ewouldblock: + action: drop-current-log-line + note: "OS buffer full (receiver is too slow). Do not retry, do not spam error_log." + on_econnrefused_or_enoent: + action: + - close_socket + - mark_target_unavailable + - schedule_reconnect + note: "Target socket closed or deleted by log receiver." + generic_errors: + action: drop-current-log-line + drop_policy: + description: > + Logging errors never impact client response. The current log line is + silently dropped except for throttled error_log reporting. -error_handling: - apache_error_log_reporting: - enabled: true - throttle_interval_seconds: "@config.JsonSockLogErrorReportInterval" - events: - - type: connect_failure - message_template: "[mod_reqin_log] Unix socket connect failed: /" - - type: write_failure - message_template: "[mod_reqin_log] Unix socket write failed: /" - fatal_conditions: - - description: > - Misconfiguration (JsonSockLogEnabled On but missing JsonSockLogSocket) - should be reported at startup as a configuration error. - - description: > - Any internal JSON-encoding failure should be treated as non-fatal: - drop current log and optionally emit a throttled error_log entry. + error_handling: + apache_error_log_reporting: + enabled: true + throttle_interval_seconds: config.JsonSockLogErrorReportInterval + events: + - type: connect_failure + message_template: "mod_reqin_log: Unix socket connect failed: [errno_detail]" + - type: write_failure + message_template: "mod_reqin_log: Unix socket write failed: [errno_detail]" + fatal_conditions: + - description: > + Misconfiguration (JsonSockLogEnabled On but missing JsonSockLogSocket) + should be reported at startup as a configuration error. + - description: > + Any internal JSON-encoding failure should be treated as non-fatal + (drop current log and optionally emit a throttled error_log entry). constraints: performance: @@ -293,10 +326,10 @@ constraints: notes: - Module includes built-in blacklist of sensitive headers to prevent accidental credential leakage (Authorization, Cookie, X-Api-Key, etc.). - - Socket permissions default to 0o660 (owner+group only) for security. + - Socket permissions default to 0o660 (owner/group only) for security. - Recommended socket path: /var/run/logcorrelator/http.socket (not /tmp). - - Use environment variable MOD_REQIN_LOG_SOCKET to configure socket path. - - Module does not anonymize IPs; data protection is delegated to configuration. + - Use environment variable MOD_REQINLOG_SOCKET to configure socket path. + - Module does not anonymize IPs (data protection is delegated to configuration). - No requests are rejected due to logging failures. hardening: - Socket path length validated against system limit (108 bytes). @@ -310,64 +343,98 @@ constraints: - Module must behave correctly under high traffic, socket disappearance, and repeated connect failures. -testing: - strategy: - unit_tests: - framework: cmocka - location: tests/unit/test_module_real.c - focus: - - JSON serialization with header truncation and header count limits. - - Dynamic buffer operations (dynbuf_t) with resize handling. - - ISO8601 timestamp formatting. - - Header value truncation to JsonSockLogMaxHeaderValueLen. - - Control character escaping in JSON strings. - execution: - - docker build -f Dockerfile.tests . - - docker run --rm ctest --output-on-failure +testing_strategy: + unit_tests: + framework: cmocka + location: tests/unit/test_module_real.c + focus: + - JSON serialization with header truncation and header count limits. + - Dynamic buffer operations (dynbuf_t) with resize handling. + - ISO8601 timestamp formatting. + - Header value truncation to JsonSockLogMaxHeaderValueLen. + - Control character escaping in JSON strings. + execution: + - docker build -f Dockerfile.tests . + - docker run --rm ctest --output-on-failure - -ci: - strategy: - description: > - All builds, tests and packaging are executed inside Docker containers - using GitLab CI with Docker-in-Docker (dind). - tools: - orchestrator: GitLab CI - container_engine: docker - dind: true +ci_strategy: + description: > + All builds, tests and packaging are executed inside Docker containers + using GitLab CI with Docker-in-Docker (dind). No RPM build or test is + allowed on bare-metal or shared CI runners. + tools: + orchestrator: GitLab CI + container_engine: docker + dind: true workflow_file: .gitlab-ci.yml - rpm_strategy: > - Separate RPMs are built for each major RHEL/CentOS/Rocky/AlmaLinux version - (el8, el9, el10) due to glibc and httpd-devel incompatibilities - across major versions. A single RPM cannot work across all versions. - RPM packages are built using rpmbuild with mod_reqin_log.spec file. + constraints: + no_host_builds: true + description: > + It is forbidden to run rpmbuild, unit tests or package verification + directly on the CI host. All steps MUST run inside Docker containers + defined by project Dockerfiles. + rpm_strategy: > + Separate RPMs are built for each major RHEL/CentOS/Rocky/AlmaLinux version + (el8, el9, el10) due to glibc and httpd-devel incompatibilities across + major versions. A single RPM cannot work across all versions. + RPM packages are built using rpmbuild with mod_reqin_log.spec file. + rpm_changelog: + policy: mandatory + description: > + For every version or release bump of the RPM (Version or Release tag + in mod_reqin_log.spec), the %changelog section MUST be updated with: + - date, packager, new version-release + - brief description of the changes. + validation: + - A CI job MUST fail if Version/Release changed and no new %changelog + entry is present. + - Changelog is the single source of truth for packaged changes. stages: + - name: validate-spec + description: > + Ensure that any change to Version/Release in mod_reqin_log.spec + is accompanied by a new %changelog entry. + containerized: true + dockerfile: Dockerfile.tools + checks: + - script: scripts/check_spec_changelog.sh mod_reqin_log.spec + fail_on_missing_changelog: true - name: build description: > - Build all RPM packages (el8, el9, el10) using Dockerfile.package with multi-stage build. + Build all RPM packages (el8, el9, el10) using Dockerfile.package + with multi-stage build, entirely inside a Docker container. dockerfile: Dockerfile.package + containerized: true artifacts: - dist/rpm/*.el8.*.rpm - dist/rpm/*.el9.*.rpm - dist/rpm/*.el10.*.rpm - - name: test description: > - Run unit tests (C with cmocka) inside Docker containers. + Run unit tests (C with cmocka) inside Docker containers, using + Dockerfile.tests as the only execution environment. dockerfile: Dockerfile.tests + containerized: true execution: ctest --output-on-failure - - name: verify description: > - Verify package installation on each target distribution. + Verify RPM installation and module loading on each target distribution + by running containers for each OS. + containerized: true jobs: - name: verify-rpm-el8 image: rockylinux:8 - check: "rpm -qi mod_reqin_log && httpd -M | grep reqin_log" + steps: + - rpm -qi mod_reqin_log + - httpd -M | grep reqin_log - name: verify-rpm-el9 image: rockylinux:9 - check: "rpm -qi mod_reqin_log && httpd -M | grep reqin_log" + steps: + - rpm -qi mod_reqin_log + - httpd -M | grep reqin_log - name: verify-rpm-el10 image: almalinux:10 - check: "rpm -qi mod_reqin_log && httpd -M | grep reqin_log" + steps: + - rpm -qi mod_reqin_log + - httpd -M | grep reqin_log diff --git a/mod_reqin_log.spec b/mod_reqin_log.spec index c279e82..f541e54 100644 --- a/mod_reqin_log.spec +++ b/mod_reqin_log.spec @@ -1,4 +1,4 @@ -%global spec_version 1.0.7 +%global spec_version 1.0.8 Name: mod_reqin_log Version: %{spec_version} @@ -37,6 +37,10 @@ install -m 644 %{_pkgroot}/%{_sysconfdir}/httpd/conf.d/mod_reqin_log.conf %{buil %doc %{_docdir}/%{name} %changelog +* Mon Mar 02 2026 Developer - 1.0.8 +- FEATURE: Add req_id, scheme, unparsed_uri, args, keepalives, content_length fields to JSON output +- FIX: Change socket type from SOCK_STREAM to SOCK_DGRAM per architecture.yml + * Sun Mar 01 2026 Developer - 1.0.7 - FEATURE: Add JsonSockLogLevel directive (DEBUG, INFO, WARNING, ERROR, EMERG) diff --git a/src/mod_reqin_log.c b/src/mod_reqin_log.c index 27ce1f4..7906aa2 100644 --- a/src/mod_reqin_log.c +++ b/src/mod_reqin_log.c @@ -524,7 +524,7 @@ static int try_connect(reqin_log_config_t *cfg, reqin_log_child_state_t *state, state->last_connect_attempt = now; if (state->socket_fd < 0) { - state->socket_fd = socket(AF_UNIX, SOCK_STREAM, 0); + state->socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0); if (state->socket_fd < 0) { err = errno; state->connect_failed = 1; @@ -668,6 +668,12 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child const char *path; const char *host; const char *http_version; + const char *scheme; + const char *unparsed_uri; + const char *args; + const char *req_id; + const char *content_length_str; + apr_int64_t content_length; if (!r || !r->server || !r->pool || !r->connection) { return; @@ -707,6 +713,42 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child http_version = apr_pstrmemdup(pool, http_version, 16); } + /* scheme (https or http) */ + scheme = ap_http_scheme(r); + if (scheme == NULL) { + scheme = "http"; + } + + /* unparsed_uri (raw, unnormalized URI) */ + unparsed_uri = r->unparsed_uri ? r->unparsed_uri : ""; + /* Sanitize unparsed_uri to prevent oversized values */ + if (strlen(unparsed_uri) > 2048) { + unparsed_uri = apr_pstrmemdup(pool, unparsed_uri, 2048); + } + + /* args (query string) */ + args = r->args ? r->args : ""; + /* Sanitize args to prevent oversized values */ + if (strlen(args) > 2048) { + args = apr_pstrmemdup(pool, args, 2048); + } + + /* req_id (unique request identifier from Apache) */ + req_id = r->log_id ? r->log_id : ""; + + /* content_length (from Content-Length header) */ + content_length_str = apr_table_get(r->headers_in, "Content-Length"); + if (content_length_str != NULL) { + char *endptr; + errno = 0; + content_length = strtoll(content_length_str, &endptr, 10); + if (errno != 0 || endptr == content_length_str || *endptr != '\0' || content_length < 0) { + content_length = 0; + } + } else { + content_length = 0; + } + dynbuf_init(&buf, pool, 4096); dynbuf_append(&buf, "{", 1); @@ -728,6 +770,16 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child dynbuf_append(&buf, ",", 1); } + /* req_id (unique request identifier) */ + dynbuf_append(&buf, "\"req_id\":\"", 9); + append_json_string(&buf, req_id); + dynbuf_append(&buf, "\",", 2); + + /* scheme (http or https) */ + dynbuf_append(&buf, "\"scheme\":\"", 10); + append_json_string(&buf, scheme); + dynbuf_append(&buf, "\",", 2); + /* src_ip */ dynbuf_append(&buf, "\"src_ip\":\"", 10); append_json_string(&buf, src_ip); @@ -768,6 +820,16 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child append_json_string(&buf, path); dynbuf_append(&buf, "\",", 2); + /* unparsed_uri (raw, unnormalized URI) */ + dynbuf_append(&buf, "\"unparsed_uri\":\"", 16); + append_json_string(&buf, unparsed_uri); + dynbuf_append(&buf, "\",", 2); + + /* args (query string) */ + dynbuf_append(&buf, "\"args\":\"", 8); + append_json_string(&buf, args); + dynbuf_append(&buf, "\",", 2); + /* host */ dynbuf_append(&buf, "\"host\":\"", 8); append_json_string(&buf, host); @@ -778,6 +840,22 @@ static void log_request(request_rec *r, reqin_log_config_t *cfg, reqin_log_child append_json_string(&buf, http_version); dynbuf_append(&buf, "\"", 1); + /* keepalives (number of requests on this connection) */ + dynbuf_append(&buf, ",\"keepalives\":", 15); + { + char ka_buf[16]; + snprintf(ka_buf, sizeof(ka_buf), "%d", r->connection->keepalives); + dynbuf_append(&buf, ka_buf, -1); + } + + /* content_length (from Content-Length header) */ + dynbuf_append(&buf, ",\"content_length\":", 18); + { + char cl_buf[32]; + snprintf(cl_buf, sizeof(cl_buf), "%" APR_INT64_T_FMT, content_length); + dynbuf_append(&buf, cl_buf, -1); + } + /* Check buffer size before adding headers to prevent memory exhaustion */ if (buf.len >= MAX_JSON_SIZE) { if (SHOULD_LOG(srv_conf, REQIN_LOG_LEVEL_DEBUG)) {