- FIX: timestamp JSON field now uses r->request_time (set at request reception by Apache) instead of apr_time_now() called during log processing - DOC: remove unparsed_uri and fragment fields from architecture.yml data model (these fields are not logged by the module) - DOC: update example_full and timestamp description in architecture.yml Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
430 lines
16 KiB
YAML
430 lines
16 KiB
YAML
project:
|
|
name: mod_reqin_log
|
|
description: >
|
|
Apache HTTPD 2.4 module logging all incoming HTTP requests as JSON lines
|
|
to a Unix domain socket at request reception time (no processing time).
|
|
language: c
|
|
author:
|
|
name: Jacquin Antoine
|
|
email: rpm@arkel.fr
|
|
target:
|
|
server: apache-httpd
|
|
version: "2.4"
|
|
os: rocky-linux-8+, almalinux-10+
|
|
build:
|
|
toolchain: gcc
|
|
apache_dev: httpd-devel (apxs)
|
|
artifacts:
|
|
- mod_reqin_log.so
|
|
|
|
context:
|
|
architecture:
|
|
pattern: native-apache-module
|
|
scope: global
|
|
mpm_compatibility:
|
|
- prefork
|
|
- worker
|
|
- event
|
|
request_phase:
|
|
hook: post_read_request
|
|
rationale: >
|
|
Log as soon as the HTTP request is fully read to capture input-side data
|
|
(client/server addresses, request line, headers) without waiting for
|
|
application processing.
|
|
logging_scope:
|
|
coverage: all-traffic
|
|
description: >
|
|
Every HTTP request handled by the Apache instance is considered for logging
|
|
when the module is enabled and the Unix socket is configured.
|
|
|
|
module:
|
|
name: mod_reqin_log
|
|
files:
|
|
source:
|
|
- src/mod_reqin_log.c
|
|
- src/mod_reqin_log.h
|
|
packaging:
|
|
- mod_reqin_log.spec
|
|
tests:
|
|
- tests/unit/test_module_real.c
|
|
- tests/unit/test_config_parsing.c
|
|
- tests/unit/test_header_handling.c
|
|
- tests/unit/test_json_serialization.c
|
|
hooks:
|
|
- name: register_hooks
|
|
responsibilities:
|
|
- Register post_read_request hook for logging at request reception.
|
|
- Register child_init hook for per-process state initialization.
|
|
- Initialize per-process server configuration structure.
|
|
- name: child_init
|
|
responsibilities:
|
|
- Initialize module state for each Apache child process.
|
|
- Reset per-process socket state (fd, timers, error flags).
|
|
- Attempt initial non-blocking connection to Unix socket if configured.
|
|
- name: post_read_request
|
|
responsibilities:
|
|
- Retrieve per-process server configuration (thread-safe).
|
|
- Ensure Unix socket is connected (with periodic reconnect).
|
|
- Build JSON log document for the request.
|
|
- Write JSON line to Unix socket using non-blocking I/O.
|
|
- Handle errors by dropping the current log line and rate-limiting
|
|
error reports into Apache error_log.
|
|
thread_safety:
|
|
model: per-process-state
|
|
description: >
|
|
Each Apache child process maintains its own socket state stored in the
|
|
server configuration structure (reqin_log_server_conf_t). This avoids
|
|
race conditions in worker and event MPMs where multiple threads share
|
|
a process.
|
|
implementation:
|
|
- State stored via ap_get_module_config(s->module_config)
|
|
- No global variables for socket state
|
|
- Each process has independent: socket_fd, connect timers, error timers
|
|
|
|
data_model:
|
|
json_line:
|
|
description: >
|
|
One JSON object per HTTP request, serialized on a single line and
|
|
terminated by "\n". Uses flat structure with header fields at root level.
|
|
structure: flat
|
|
fields:
|
|
- name: time
|
|
type: string
|
|
format: iso8601-with-timezone
|
|
example: "2026-02-26T11:59:30Z"
|
|
- name: timestamp
|
|
type: integer
|
|
unit: microseconds (expressed as nanoseconds)
|
|
description: >
|
|
Wall-clock timestamp in microseconds since Unix epoch, expressed
|
|
as nanoseconds for compatibility (multiplied by 1000).
|
|
Uses r->request_time (set by Apache at request reception).
|
|
The nanosecond representation is for API compatibility only.
|
|
example: 1708948770000000000
|
|
- name: scheme
|
|
type: string
|
|
description: Connection scheme evaluated via ap_http_scheme(r).
|
|
example: "https"
|
|
- name: src_ip
|
|
type: string
|
|
example: "192.0.2.10"
|
|
- name: src_port
|
|
type: integer
|
|
example: 45678
|
|
- name: dst_ip
|
|
type: string
|
|
example: "198.51.100.5"
|
|
- name: dst_port
|
|
type: integer
|
|
example: 443
|
|
- name: method
|
|
type: string
|
|
example: "GET"
|
|
- name: path
|
|
type: string
|
|
description: Cleaned and normalized path (r->parsed_uri.path).
|
|
example: "/api/users"
|
|
- name: query
|
|
type: string
|
|
description: >
|
|
Query string component from the parsed URI (r->parsed_uri.query).
|
|
Does not include the leading '?'. Allows detection of payloads like
|
|
SQLi or XSS passed in GET requests.
|
|
example: "id=1%20UNION%20SELECT"
|
|
- name: host
|
|
type: string
|
|
example: "example.com"
|
|
- name: http_version
|
|
type: string
|
|
example: "HTTP/1.1"
|
|
- name: keepalives
|
|
type: integer
|
|
description: >
|
|
Number of requests served over the current connection (r->connection->keepalives).
|
|
If 0, it indicates a newly established TCP connection.
|
|
If > 0, it confirms an active Keep-Alive session.
|
|
example: 2
|
|
- name: content_length
|
|
type: integer
|
|
description: >
|
|
Declared size of the request body (POST payload),
|
|
extracted directly from the 'Content-Length' header.
|
|
example: 1048576
|
|
- name: header_<HeaderName>
|
|
type: string
|
|
description: >
|
|
Flattened header fields at root level. For each configured header <H>,
|
|
a field 'header_<H>' is added directly to the JSON root object.
|
|
Headers are only included if present in the request.
|
|
key_pattern: "header_<configured_header_name>"
|
|
optional: true
|
|
example:
|
|
header_X-Request-Id: "abcd-1234"
|
|
header_User-Agent: "curl/7.70.0"
|
|
example_full: |
|
|
{"time":"2026-02-26T11:59:30Z","timestamp":1708948770000000000,"scheme":"https","src_ip":"192.0.2.10","src_port":45678,"dst_ip":"198.51.100.5","dst_port":443,"method":"GET","path":"/api/users","query":"id=1","host":"example.com","http_version":"HTTP/1.1","keepalives":0,"content_length":0,"header_X-Request-Id":"abcd-1234","header_User-Agent":"curl/7.70.0"}
|
|
|
|
configuration:
|
|
scope: global
|
|
directives:
|
|
- name: JsonSockLogEnabled
|
|
type: flag
|
|
context: server-config
|
|
default: "Off"
|
|
description: >
|
|
Enable or disable mod_reqin_log logging globally. Logging only occurs
|
|
when this directive is On and JsonSockLogSocket is set.
|
|
- name: JsonSockLogSocket
|
|
type: string
|
|
context: server-config
|
|
required_when_enabled: true
|
|
example: "/var/run/logcorrelator/http.socket"
|
|
description: >
|
|
Filesystem path of the Unix domain socket to which JSON log lines
|
|
will be written.
|
|
- name: JsonSockLogHeaders
|
|
type: list
|
|
context: server-config
|
|
value_example: ["X-Request-Id", "X-Trace-Id", "User-Agent"]
|
|
description: >
|
|
List of HTTP header names to log. For each configured header <H>,
|
|
the module adds a JSON field 'header_<H>' at the root level of the
|
|
JSON log entry (flat structure). Order matters for applying the
|
|
JsonSockLogMaxHeaders limit.
|
|
- name: JsonSockLogMaxHeaders
|
|
type: integer
|
|
context: server-config
|
|
default: 10
|
|
min: 0
|
|
description: >
|
|
Maximum number of headers from JsonSockLogHeaders to actually log.
|
|
If more headers are configured, only the first N are considered.
|
|
- name: JsonSockLogMaxHeaderValueLen
|
|
type: integer
|
|
context: server-config
|
|
default: 256
|
|
min: 1
|
|
description: >
|
|
Maximum length in characters for each logged header value.
|
|
Values longer than this limit are truncated before JSON encoding.
|
|
- name: JsonSockLogReconnectInterval
|
|
type: integer
|
|
context: server-config
|
|
default: 10
|
|
unit: seconds
|
|
description: >
|
|
Minimal delay between two connection attempts to the Unix socket after
|
|
a failure. Used to avoid reconnect attempts on every request.
|
|
- name: JsonSockLogErrorReportInterval
|
|
type: integer
|
|
context: server-config
|
|
default: 10
|
|
unit: seconds
|
|
description: >
|
|
Minimal delay between two error messages emitted into Apache error_log
|
|
for repeated I/O or connection errors on the Unix socket.
|
|
|
|
behavior:
|
|
enabling_rules:
|
|
- JsonSockLogEnabled must be On.
|
|
- JsonSockLogSocket must be set to a non-empty path.
|
|
header_handling:
|
|
- Built-in blacklist prevents logging of sensitive headers by default.
|
|
- Blacklisted headers: Authorization, Cookie, Set-Cookie, X-Api-Key,
|
|
X-Auth-Token, Proxy-Authorization, WWW-Authenticate.
|
|
- Blacklisted headers are silently skipped (logged at DEBUG level only).
|
|
- If a configured header is absent in a request, the corresponding
|
|
JSON key is omitted from the log entry.
|
|
- Header values are truncated to JsonSockLogMaxHeaderValueLen characters.
|
|
|
|
io:
|
|
socket:
|
|
type: unix-domain
|
|
protocol: SOCK_DGRAM
|
|
mode: client
|
|
path_source: JsonSockLogSocket
|
|
connection:
|
|
persistence: false
|
|
non_blocking: true
|
|
lifecycle:
|
|
open:
|
|
- Create DGRAM socket and set default destination address via connect()
|
|
during child_init if enabled.
|
|
- Re-attempt addressing after reconnect interval expiry if target
|
|
was previously unavailable.
|
|
failure:
|
|
- On missing target socket (ECONNREFUSED/ENOENT), mark target as unavailable.
|
|
- Do not block the worker process.
|
|
reconnect:
|
|
strategy: time-based
|
|
interval_seconds: config.JsonSockLogReconnectInterval
|
|
trigger: >
|
|
When a request arrives and the last target resolution attempt time is older
|
|
than reconnect interval, a new attempt to address the socket is made.
|
|
write:
|
|
format: json_object
|
|
mode: non-blocking
|
|
atomicity: >
|
|
Full JSON line is sent as a single datagram. Message size must not exceed
|
|
system DGRAM limits or MAX_JSON_SIZE (64KB).
|
|
error_handling:
|
|
on_eagain_or_ewouldblock:
|
|
action: drop-current-log-line
|
|
note: "OS buffer full (receiver is too slow). Do not retry, do not spam error_log."
|
|
on_econnrefused_or_enoent:
|
|
action:
|
|
- close_socket
|
|
- mark_target_unavailable
|
|
- schedule_reconnect
|
|
note: "Target socket closed or deleted by log receiver."
|
|
generic_errors:
|
|
action: drop-current-log-line
|
|
drop_policy:
|
|
description: >
|
|
Logging errors never impact client response. The current log line is
|
|
silently dropped except for throttled error_log reporting.
|
|
|
|
error_handling:
|
|
apache_error_log_reporting:
|
|
enabled: true
|
|
throttle_interval_seconds: config.JsonSockLogErrorReportInterval
|
|
events:
|
|
- type: connect_failure
|
|
message_template: "mod_reqin_log: Unix socket connect failed: [errno_detail]"
|
|
- type: write_failure
|
|
message_template: "mod_reqin_log: Unix socket write failed: [errno_detail]"
|
|
fatal_conditions:
|
|
- description: >
|
|
Misconfiguration (JsonSockLogEnabled On but missing JsonSockLogSocket)
|
|
should be reported at startup as a configuration error.
|
|
- description: >
|
|
Any internal JSON-encoding failure should be treated as non-fatal
|
|
(drop current log and optionally emit a throttled error_log entry).
|
|
|
|
constraints:
|
|
performance:
|
|
objectives:
|
|
- Logging overhead per request should be minimal and non-blocking.
|
|
- No dynamic allocations in hot path beyond what is strictly necessary
|
|
(prefer APR pools where possible).
|
|
design_choices:
|
|
- Single JSON serialization pass per request.
|
|
- Use non-blocking I/O to avoid stalling worker threads/processes.
|
|
- Avoid reconnect attempts on every request via time-based backoff.
|
|
security:
|
|
notes:
|
|
- Module includes built-in blacklist of sensitive headers to prevent
|
|
accidental credential leakage (Authorization, Cookie, X-Api-Key, etc.).
|
|
- Socket permissions default to 0o660 (owner/group only) for security.
|
|
- Recommended socket path: /var/run/logcorrelator/http.socket (not /tmp).
|
|
- Use environment variable MOD_REQINLOG_SOCKET to configure socket path.
|
|
- Module does not anonymize IPs (data protection is delegated to configuration).
|
|
- No requests are rejected due to logging failures.
|
|
hardening:
|
|
- Socket path length validated against system limit (108 bytes).
|
|
- JSON log line size limited to 64KB to prevent memory exhaustion DoS.
|
|
- NULL pointer checks on all connection/request fields.
|
|
- Thread-safe socket FD access via mutex (worker/event MPMs).
|
|
- Error logging reduced to prevent information disclosure.
|
|
robustness:
|
|
requirements:
|
|
- Logging failures must not crash Apache worker processes.
|
|
- Module must behave correctly under high traffic, socket disappearance,
|
|
and repeated connect failures.
|
|
|
|
testing_strategy:
|
|
unit_tests:
|
|
framework: cmocka
|
|
location: tests/unit/test_module_real.c
|
|
focus:
|
|
- JSON serialization with header truncation and header count limits.
|
|
- Dynamic buffer operations (dynbuf_t) with resize handling.
|
|
- ISO8601 timestamp formatting.
|
|
- Header value truncation to JsonSockLogMaxHeaderValueLen.
|
|
- Control character escaping in JSON strings.
|
|
execution:
|
|
- docker build -f Dockerfile.tests .
|
|
- docker run --rm <image> ctest --output-on-failure
|
|
|
|
ci_strategy:
|
|
description: >
|
|
All builds, tests and packaging are executed inside Docker containers
|
|
using GitLab CI with Docker-in-Docker (dind). No RPM build or test is
|
|
allowed on bare-metal or shared CI runners.
|
|
tools:
|
|
orchestrator: GitLab CI
|
|
container_engine: docker
|
|
dind: true
|
|
workflow_file: .gitlab-ci.yml
|
|
constraints:
|
|
no_host_builds: true
|
|
description: >
|
|
It is forbidden to run rpmbuild, unit tests or package verification
|
|
directly on the CI host. All steps MUST run inside Docker containers
|
|
defined by project Dockerfiles.
|
|
rpm_strategy: >
|
|
Separate RPMs are built for each major RHEL/CentOS/Rocky/AlmaLinux version
|
|
(el8, el9, el10) due to glibc and httpd-devel incompatibilities across
|
|
major versions. A single RPM cannot work across all versions.
|
|
RPM packages are built using rpmbuild with mod_reqin_log.spec file.
|
|
rpm_changelog:
|
|
policy: mandatory
|
|
description: >
|
|
For every version or release bump of the RPM (Version or Release tag
|
|
in mod_reqin_log.spec), the %changelog section MUST be updated with:
|
|
- date, packager, new version-release
|
|
- brief description of the changes.
|
|
validation:
|
|
- A CI job MUST fail if Version/Release changed and no new %changelog
|
|
entry is present.
|
|
- Changelog is the single source of truth for packaged changes.
|
|
stages:
|
|
- name: validate-spec
|
|
description: >
|
|
Ensure that any change to Version/Release in mod_reqin_log.spec
|
|
is accompanied by a new %changelog entry.
|
|
containerized: true
|
|
dockerfile: Dockerfile.tools
|
|
checks:
|
|
- script: scripts/check_spec_changelog.sh mod_reqin_log.spec
|
|
fail_on_missing_changelog: true
|
|
- name: build
|
|
description: >
|
|
Build all RPM packages (el8, el9, el10) using Dockerfile.package
|
|
with multi-stage build, entirely inside a Docker container.
|
|
dockerfile: Dockerfile.package
|
|
containerized: true
|
|
artifacts:
|
|
- dist/rpm/*.el8.*.rpm
|
|
- dist/rpm/*.el9.*.rpm
|
|
- dist/rpm/*.el10.*.rpm
|
|
- name: test
|
|
description: >
|
|
Run unit tests (C with cmocka) inside Docker containers, using
|
|
Dockerfile.tests as the only execution environment.
|
|
dockerfile: Dockerfile.tests
|
|
containerized: true
|
|
execution: ctest --output-on-failure
|
|
- name: verify
|
|
description: >
|
|
Verify RPM installation and module loading on each target distribution
|
|
by running containers for each OS.
|
|
containerized: true
|
|
jobs:
|
|
- name: verify-rpm-el8
|
|
image: rockylinux:8
|
|
steps:
|
|
- rpm -qi mod_reqin_log
|
|
- httpd -M | grep reqin_log
|
|
- name: verify-rpm-el9
|
|
image: rockylinux:9
|
|
steps:
|
|
- rpm -qi mod_reqin_log
|
|
- httpd -M | grep reqin_log
|
|
- name: verify-rpm-el10
|
|
image: almalinux:10
|
|
steps:
|
|
- rpm -qi mod_reqin_log
|
|
- httpd -M | grep reqin_log
|
|
|