From 7b59e11f79207a35117e77dd401f4ab7ed1485a2 Mon Sep 17 00:00:00 2001
From: Jacek <jacek@clerk.dev>
Date: Thu, 4 Jun 2026 21:43:46 -0500
Subject: [PATCH 1/2] ci(e2e): harden staging e2e against deterministic and
 flaky reds

The staging e2e "generic" leg was red on ~100% of runs because a few
independent failures sat behind an all-or-nothing gate:

- whatsapp-phone-code: the WhatsApp channel is not enabled on the staging
  instance, so the button never renders and the suite times out every run.
  It also bypasses the isStagingReady graceful-skip, so skip it explicitly
  on staging until the channel is provisioned.
- custom-pages "survives a parent rerender": validates an unreleased
  @clerk/react fix (#8604), but the staging leg installs published @latest,
  so it is deterministically red until release. Skip when E2E_SDK_SOURCE=latest;
  PR CI (ref builds) still covers it.
- concurrency was keyed on ref (effectively always "main") with
  cancel-in-progress, so each new staging deploy cancelled the in-flight run
  and no commit could report a status. Key on the clerk_go commit instead.
- raise the job timeout above the 25-minute test step so the job cap no
  longer kills runs mid-suite.
- emit and upload a JSON Playwright report in CI so the report job can
  classify failures (flaky vs failed, infra vs regression) later.
---
 .changeset/staging-e2e-resilience-p0.md       |  2 ++
 .github/workflows/e2e-staging.yml             | 21 +++++++++++++++++--
 integration/playwright.config.ts              |  5 +++++
 integration/tests/custom-pages.test.ts        | 12 +++++++++++
 integration/tests/whatsapp-phone-code.test.ts |  9 ++++++++
 5 files changed, 47 insertions(+), 2 deletions(-)
 create mode 100644 .changeset/staging-e2e-resilience-p0.md

diff --git a/.changeset/staging-e2e-resilience-p0.md b/.changeset/staging-e2e-resilience-p0.md
new file mode 100644
index 00000000000..a845151cc84
--- /dev/null
+++ b/.changeset/staging-e2e-resilience-p0.md
@@ -0,0 +1,2 @@
+---
+---
diff --git a/.github/workflows/e2e-staging.yml b/.github/workflows/e2e-staging.yml
index bb3d3772505..7ae9daca3c9 100644
--- a/.github/workflows/e2e-staging.yml
+++ b/.github/workflows/e2e-staging.yml
@@ -33,7 +33,11 @@ permissions:
   actions: write
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.inputs.ref || github.event.client_payload.ref || 'main' }}
+  # Key on the clerk_go commit being validated rather than the (effectively always "main")
+  # ref, so distinct staging deploys no longer cancel each other and each commit can report
+  # its own result. Duplicate dispatches for the SAME commit still de-dupe; manual dispatches
+  # without a commit SHA fall back to the unique run_id and are never cancelled.
+  group: ${{ github.workflow }}-${{ github.event.inputs.clerk-go-commit-sha || github.event.client_payload.clerk-go-commit-sha || github.run_id }}
   cancel-in-progress: true
 
 jobs:
@@ -118,7 +122,9 @@ jobs:
     defaults:
       run:
         shell: bash
-    timeout-minutes: ${{ vars.TIMEOUT_MINUTES_LONG && fromJSON(vars.TIMEOUT_MINUTES_LONG) || 15 }}
+    # Must stay above the 25-minute "Run Integration Tests" step budget below, otherwise the
+    # job-level cap kills the run mid-suite and reports a misleading timeout/cancellation.
+    timeout-minutes: ${{ vars.TIMEOUT_MINUTES_LONG && fromJSON(vars.TIMEOUT_MINUTES_LONG) || 30 }}
 
     strategy:
       fail-fast: false
@@ -293,6 +299,17 @@ jobs:
           path: test-results
           retention-days: 1
 
+      # Always upload the machine-readable report (even on success) so downstream
+      # reporting/classification can run regardless of the leg's pass/fail outcome.
+      - name: Upload Playwright JSON report
+        if: ${{ !cancelled() }}
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        with:
+          name: playwright-report-${{ github.run_id }}-${{ github.run_attempt }}-${{ steps.inputs.outputs.artifact-suffix }}
+          path: playwright-report/results.json
+          if-no-files-found: ignore
+          retention-days: 3
+
   report:
     name: Report Results
     needs: [integration-tests]
diff --git a/integration/playwright.config.ts b/integration/playwright.config.ts
index fbcd35fa2a9..911172a94a6 100644
--- a/integration/playwright.config.ts
+++ b/integration/playwright.config.ts
@@ -27,6 +27,11 @@ export const common: PlaywrightTestConfig = {
 export default defineConfig({
   ...common,
 
+  // Emit a machine-readable report in CI so the staging workflow's report job can
+  // classify failures (flaky vs failed, infra vs regression) instead of reading a
+  // single pass/fail boolean. Local runs keep the default human-readable list output.
+  reporter: process.env.CI ? [['list'], ['json', { outputFile: 'playwright-report/results.json' }]] : 'list',
+
   projects: [
     {
       name: 'setup',
diff --git a/integration/tests/custom-pages.test.ts b/integration/tests/custom-pages.test.ts
index 0636e964c71..5f7304f615a 100644
--- a/integration/tests/custom-pages.test.ts
+++ b/integration/tests/custom-pages.test.ts
@@ -1,3 +1,4 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
 import { expect, test } from '@playwright/test';
 
 import type { Application } from '../models/application';
@@ -172,6 +173,17 @@ testAgainstRunningApps({ withPattern: ['react.vite.withEmailCodes'] })(
     });
 
     test('custom profile page survives a parent rerender without remounting', async ({ page, context }) => {
+      // Validates the @clerk/react fix from #8604 (custom pages must not remount on a
+      // parent rerender). The staging leg installs published @latest packages
+      // (E2E_SDK_SOURCE=latest), which do not yet contain the fix, so this is
+      // deterministically red there until the next @clerk/react release. PR CI builds
+      // the SDK from the branch and still exercises this test.
+      // TODO: remove this skip once @clerk/react including #8604 is published to npm.
+      test.skip(
+        process.env.E2E_SDK_SOURCE === 'latest',
+        'validates the unreleased @clerk/react fix (#8604); covered by ref-built CI',
+      );
+
       const u = createTestUtils({ app, page, context });
       await u.po.signIn.goTo();
       await u.po.signIn.waitForMounted();
diff --git a/integration/tests/whatsapp-phone-code.test.ts b/integration/tests/whatsapp-phone-code.test.ts
index 8f5c310f82f..10e240438ca 100644
--- a/integration/tests/whatsapp-phone-code.test.ts
+++ b/integration/tests/whatsapp-phone-code.test.ts
@@ -1,3 +1,4 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
 import { expect, test } from '@playwright/test';
 
 import type { Application } from '../models/application';
@@ -6,6 +7,14 @@ import type { FakeUser } from '../testUtils';
 import { createTestUtils } from '../testUtils';
 
 test.describe('sign up and sign in with WhatsApp phone code @generic', () => {
+  // The WhatsApp alternate phone-code channel is not provisioned on the staging
+  // instance, so the WhatsApp sign-up button never renders there and every test in
+  // this suite times out deterministically (no amount of retrying helps). Unlike the
+  // long-running-app suites, this test builds its own app via `app.withEnv(...)` and
+  // therefore bypasses the `isStagingReady` graceful-skip. Skip it explicitly on
+  // staging until the channel is enabled on the staging mirror.
+  test.skip(process.env.E2E_STAGING === '1', 'WhatsApp channel is not enabled on the staging instance');
+
   const configs = [appConfigs.next.appRouter];
 
   configs.forEach(config => {

From 0eb5396dc9614ad3bf5fd6a1563a2f5aec93833b Mon Sep 17 00:00:00 2001
From: Jacek <jacek@clerk.dev>
Date: Thu, 4 Jun 2026 21:59:57 -0500
Subject: [PATCH 2/2] ci(e2e): gate staging e2e on critical staging-instance
 config drift

validate-staging-instances.mjs already diffs prod vs staging /v1/environment
but every exit path returned 0, so detected drift blocked nothing and the
job was not a dependency of the test matrix. A drifted staging mirror (e.g. a
missing phone_number WhatsApp channel) therefore surfaced only as opaque test
timeouts 200 tests deep.

Add a tight CRITICAL_PATHS allowlist (attribute enabled toggles,
phone_number.channels, auth factors/strategies, social enable/disable,
password settings) and an ACCEPTED_DRIFT escape hatch so known gaps don't
block while new drift does. In strict mode the script exits non-zero on a
blocking mismatch; fetch failures and cosmetic drift never fail the build.

Wire integration-tests to need validate-instances, and drive strictness from
the STAGING_VALIDATE_STRICT repo variable (default report-only). So this is a
no-op until the team opts in: it logs blocking drift and the proposed gate
without failing anything. Flip the variable to make it enforce.
---
 .changeset/staging-e2e-validate-gate.md     |   2 +
 .github/workflows/e2e-staging.yml           |  12 +-
 scripts/validate-staging-instances.mjs      |  96 +++++++++++-
 scripts/validate-staging-instances.test.mjs | 165 ++++++++++++++++++++
 4 files changed, 271 insertions(+), 4 deletions(-)
 create mode 100644 .changeset/staging-e2e-validate-gate.md

diff --git a/.changeset/staging-e2e-validate-gate.md b/.changeset/staging-e2e-validate-gate.md
new file mode 100644
index 00000000000..a845151cc84
--- /dev/null
+++ b/.changeset/staging-e2e-validate-gate.md
@@ -0,0 +1,2 @@
+---
+---
diff --git a/.github/workflows/e2e-staging.yml b/.github/workflows/e2e-staging.yml
index 7ae9daca3c9..22a7863c31c 100644
--- a/.github/workflows/e2e-staging.yml
+++ b/.github/workflows/e2e-staging.yml
@@ -111,13 +111,21 @@ jobs:
       - name: Validate staging instance settings
         run: node scripts/validate-staging-instances.mjs
         env:
+          # Report-only unless the `STAGING_VALIDATE_STRICT` repo variable is set to "true"/"1".
+          # When strict, a mismatch on critical config (see CRITICAL_PATHS in the script) fails
+          # this job, which gates the integration-tests job below so the run stops fast with a
+          # clear diagnostic instead of letting a drifted staging mirror produce opaque failures.
+          STAGING_VALIDATE_STRICT: ${{ vars.STAGING_VALIDATE_STRICT }}
           INTEGRATION_INSTANCE_KEYS: ${{ secrets.INTEGRATION_INSTANCE_KEYS }}
           INTEGRATION_STAGING_INSTANCE_KEYS: ${{ secrets.INTEGRATION_STAGING_INSTANCE_KEYS }}
 
   integration-tests:
     name: Integration Tests (${{ matrix.test-name }}, ${{ matrix.test-project }})
-    needs: [permissions-check]
-    if: ${{ always() && (needs.permissions-check.result == 'success' || needs.permissions-check.result == 'skipped') }}
+    needs: [permissions-check, validate-instances]
+    # Run when permissions passed/skipped AND the staging-config validation did not block.
+    # validate-instances only fails when strict gating is enabled and critical config drifted,
+    # so by default (report-only) this is a no-op and tests run as before.
+    if: ${{ always() && (needs.permissions-check.result == 'success' || needs.permissions-check.result == 'skipped') && (needs.validate-instances.result == 'success' || needs.validate-instances.result == 'skipped') }}
     runs-on: 'blacksmith-8vcpu-ubuntu-2204'
     defaults:
       run:
diff --git a/scripts/validate-staging-instances.mjs b/scripts/validate-staging-instances.mjs
index 5afb0178617..25a519c349b 100644
--- a/scripts/validate-staging-instances.mjs
+++ b/scripts/validate-staging-instances.mjs
@@ -35,6 +35,67 @@ function isIgnored(path) {
   return IGNORED_PATHS.some(pattern => pattern.test(path));
 }
 
+// ── Gating policy ────────────────────────────────────────────────────────────
+
+/**
+ * Functional configuration that must match between a production instance and its
+ * staging mirror for the e2e suite to be meaningful. A mismatch on any of these
+ * paths fails the gate in strict mode; every other difference is reported as
+ * informational drift and never blocks. Keep this list tight: only config that
+ * actually changes which auth flows are possible belongs here.
+ */
+const CRITICAL_PATHS = [
+  // An auth attribute (email_address, phone_number, username, ...) toggled on/off.
+  /^user_settings\.attributes\.[^.]+\.enabled$/,
+  // The phone-code channel set (sms / whatsapp), which drives alternate-channel UIs.
+  /^user_settings\.attributes\.phone_number\.channels$/,
+  // Enabled auth strategies / factors for an attribute.
+  /^user_settings\.attributes\.[^.]+\.(first_factors|second_factors|verifications)$/,
+  // A social provider enabled/disabled, or wholly added/removed.
+  /^user_settings\.social\.[^.]+(\.enabled)?$/,
+  // Password policy, which affects password sign-in / sign-up flows.
+  /^user_settings\.password_settings\..+/,
+];
+
+/**
+ * Known, intentionally-tolerated critical drift that should NOT fail the gate, so
+ * that NEW drift still does. Each entry needs a `path` (string or RegExp), an
+ * optional `instance` name to scope it, and a `reason` (ideally a tracking link).
+ * Prefer fixing the staging instance over adding entries here.
+ */
+const ACCEPTED_DRIFT = [
+  // e.g. { instance: 'with-whatsapp-phone-code', path: 'user_settings.attributes.phone_number.channels',
+  //        reason: 'WhatsApp channel not yet provisioned on staging (CLERK-XXXX)' },
+];
+
+function isCriticalPath(path) {
+  return CRITICAL_PATHS.some(pattern => pattern.test(path));
+}
+
+function isAcceptedDrift(instanceName, path, acceptedDrift = ACCEPTED_DRIFT) {
+  return acceptedDrift.some(entry => {
+    if (entry.instance !== undefined && entry.instance !== instanceName) return false;
+    return typeof entry.path === 'string' ? entry.path === path : entry.path.test(path);
+  });
+}
+
+/**
+ * Split a pair's mismatches into blocking (critical and not accepted) and
+ * informational. Pure and side-effect free for testability.
+ */
+function classifyMismatches(instanceName, mismatches, acceptedDrift = ACCEPTED_DRIFT) {
+  const blocking = [];
+  const informational = [];
+  for (const m of mismatches) {
+    if (isCriticalPath(m.path) && !isAcceptedDrift(instanceName, m.path, acceptedDrift)) {
+      blocking.push(m);
+    } else {
+      informational.push(m);
+    }
+  }
+  return { blocking, informational };
+}
+
 // ── Key loading ──────────────────────────────────────────────────────────────
 
 function loadKeys(envVar, filePath) {
@@ -311,7 +372,7 @@ function printReport(name, mismatches) {
 
 // ── Main ─────────────────────────────────────────────────────────────────────
 
-async function main() {
+async function main({ strict = ['1', 'true'].includes(process.env.STAGING_VALIDATE_STRICT) } = {}) {
   const { keys: prodKeys, errors: prodErrors } = loadKeys('INTEGRATION_INSTANCE_KEYS', 'integration/.keys.json');
   for (const err of prodErrors) console.error(`⚠️  Production keys: ${err}`);
   if (!prodKeys) {
@@ -367,6 +428,8 @@ async function main() {
 
   let mismatchCount = 0;
   let fetchFailCount = 0;
+  let blockingTotal = 0;
+  const blockingByInstance = [];
 
   for (const pair of validPairs) {
     const prodDomain = parseFapiDomain(pair.prod.pk);
@@ -386,6 +449,12 @@ async function main() {
     mismatches = collapseAttributeMismatches(mismatches);
     mismatches = collapseSocialMismatches(mismatches);
 
+    const { blocking } = classifyMismatches(pair.name, mismatches);
+    if (blocking.length > 0) {
+      blockingTotal += blocking.length;
+      blockingByInstance.push({ name: pair.name, paths: blocking.map(m => m.path) });
+    }
+
     if (mismatches.length > 0) mismatchCount++;
     printReport(pair.name, mismatches);
   }
@@ -397,12 +466,32 @@ async function main() {
   const matchedCount = validPairs.length - mismatchCount - fetchFailCount;
   if (matchedCount > 0) parts.push(`${matchedCount} matched`);
   console.log(`Summary: ${parts.join(', ')} (${validPairs.length} total)`);
+
+  // Gating: only mismatches on critical config block, and only in strict mode.
+  // Fetch failures and cosmetic drift never fail the build, to avoid false reds.
+  if (blockingTotal > 0) {
+    console.log('');
+    console.log(
+      `❌ ${blockingTotal} blocking mismatch(es) on critical config across ${blockingByInstance.length} instance(s):`,
+    );
+    for (const { name, paths } of blockingByInstance) {
+      for (const p of paths) console.log(`   - ${name}: ${p}`);
+    }
+    if (strict) {
+      console.error(
+        '\nStaging instance config has drifted on critical paths. Fix the staging instance(s) or add an accepted-drift entry.',
+      );
+      process.exit(1);
+    }
+    console.log('\n(Report-only: set STAGING_VALIDATE_STRICT=1 or pass --strict to fail the build on the above.)');
+  }
 }
 
 // Allow importing functions for testing while still being executable
 const isDirectRun = process.argv[1] === fileURLToPath(import.meta.url);
 if (isDirectRun) {
-  main().catch(err => {
+  const strict = ['1', 'true'].includes(process.env.STAGING_VALIDATE_STRICT) || process.argv.includes('--strict');
+  main({ strict }).catch(err => {
     console.error('Unexpected error:', err);
     process.exit(0);
   });
@@ -416,5 +505,8 @@ export {
   collapseAttributeMismatches,
   collapseSocialMismatches,
   compareEnvironments,
+  isCriticalPath,
+  isAcceptedDrift,
+  classifyMismatches,
   main,
 };
diff --git a/scripts/validate-staging-instances.test.mjs b/scripts/validate-staging-instances.test.mjs
index 57c86a53801..0193c7fb533 100644
--- a/scripts/validate-staging-instances.test.mjs
+++ b/scripts/validate-staging-instances.test.mjs
@@ -7,10 +7,12 @@ vi.mock('node:fs', async importOriginal => {
 });
 
 import {
+  classifyMismatches,
   collapseAttributeMismatches,
   collapseSocialMismatches,
   diffObjects,
   fetchEnvironment,
+  isCriticalPath,
   loadKeys,
   main,
   parseFapiDomain,
@@ -317,6 +319,78 @@ describe('collapseSocialMismatches', () => {
   });
 });
 
+// ── isCriticalPath ──────────────────────────────────────────────────────────
+
+describe('isCriticalPath', () => {
+  it('flags attribute enabled toggles', () => {
+    expect(isCriticalPath('user_settings.attributes.phone_number.enabled')).toBe(true);
+    expect(isCriticalPath('user_settings.attributes.email_address.enabled')).toBe(true);
+  });
+
+  it('flags phone channel changes', () => {
+    expect(isCriticalPath('user_settings.attributes.phone_number.channels')).toBe(true);
+  });
+
+  it('flags factor / verification strategy changes', () => {
+    expect(isCriticalPath('user_settings.attributes.email_address.first_factors')).toBe(true);
+    expect(isCriticalPath('user_settings.attributes.phone_number.second_factors')).toBe(true);
+    expect(isCriticalPath('user_settings.attributes.email_address.verifications')).toBe(true);
+  });
+
+  it('flags social provider enable/disable and wholly added/removed', () => {
+    expect(isCriticalPath('user_settings.social.google')).toBe(true);
+    expect(isCriticalPath('user_settings.social.google.enabled')).toBe(true);
+  });
+
+  it('flags password settings', () => {
+    expect(isCriticalPath('user_settings.password_settings.min_length')).toBe(true);
+  });
+
+  it('does not flag cosmetic / non-critical paths', () => {
+    expect(isCriticalPath('auth_config.single_session_mode')).toBe(false);
+    expect(isCriticalPath('organization_settings.enabled')).toBe(false);
+    expect(isCriticalPath('user_settings.social.google.strategy')).toBe(false);
+    expect(isCriticalPath('user_settings.sign_in.second_factor.required')).toBe(false);
+  });
+});
+
+// ── classifyMismatches ──────────────────────────────────────────────────────
+
+describe('classifyMismatches', () => {
+  it('separates blocking (critical) from informational drift', () => {
+    const mismatches = [
+      { path: 'user_settings.attributes.email_address.enabled', prod: true, staging: false },
+      { path: 'auth_config.single_session_mode', prod: true, staging: false },
+    ];
+    const { blocking, informational } = classifyMismatches('myapp', mismatches);
+    expect(blocking.map(m => m.path)).toEqual(['user_settings.attributes.email_address.enabled']);
+    expect(informational.map(m => m.path)).toEqual(['auth_config.single_session_mode']);
+  });
+
+  it('respects accepted drift scoped to a specific instance', () => {
+    const mismatches = [
+      {
+        path: 'user_settings.attributes.phone_number.channels',
+        prod: ['sms', 'whatsapp'],
+        staging: ['sms'],
+        missingOnStaging: ['whatsapp'],
+      },
+    ];
+    const accepted = [
+      { instance: 'with-whatsapp-phone-code', path: 'user_settings.attributes.phone_number.channels', reason: 'x' },
+    ];
+    expect(classifyMismatches('with-whatsapp-phone-code', mismatches, accepted).blocking).toHaveLength(0);
+    // The same drift on a different instance is NOT accepted.
+    expect(classifyMismatches('other-instance', mismatches, accepted).blocking).toHaveLength(1);
+  });
+
+  it('accepts drift matched by a RegExp path with no instance scope', () => {
+    const mismatches = [{ path: 'user_settings.password_settings.min_length', prod: 8, staging: 6 }];
+    const accepted = [{ path: /^user_settings\.password_settings\./, reason: 'x' }];
+    expect(classifyMismatches('any', mismatches, accepted).blocking).toHaveLength(0);
+  });
+});
+
 // ── fetchEnvironment ────────────────────────────────────────────────────────
 
 describe('fetchEnvironment', () => {
@@ -387,6 +461,7 @@ describe('main', () => {
     // Clean up env vars
     delete process.env.INTEGRATION_INSTANCE_KEYS;
     delete process.env.INTEGRATION_STAGING_INSTANCE_KEYS;
+    delete process.env.STAGING_VALIDATE_STRICT;
   });
 
   afterEach(() => {
@@ -535,4 +610,94 @@ describe('main', () => {
     expect(consoleErrors.some(m => m.includes('bad_entry'))).toBe(true);
     expect(consoleLogs.some(m => m.includes('1 key load errors'))).toBe(true);
   });
+
+  // ── strict gating ──────────────────────────────────────────────────────────
+
+  function mockEnvPair(prodEnv, stagingEnv) {
+    let callCount = 0;
+    vi.spyOn(globalThis, 'fetch').mockImplementation(() => {
+      callCount++;
+      const env = callCount % 2 === 1 ? prodEnv : stagingEnv;
+      return Promise.resolve({ ok: true, json: () => Promise.resolve(env) });
+    });
+  }
+
+  const emptyUserSettings = () => ({ attributes: {}, social: {}, sign_in: {}, sign_up: {}, password_settings: {} });
+
+  function setPair() {
+    process.env.INTEGRATION_INSTANCE_KEYS = JSON.stringify({ myapp: { pk: PROD_PK } });
+    process.env.INTEGRATION_STAGING_INSTANCE_KEYS = JSON.stringify({ 'clerkstage-myapp': { pk: STAGING_PK } });
+  }
+
+  it('exits non-zero in strict mode when a critical config path drifts', async () => {
+    setPair();
+    mockEnvPair(
+      {
+        auth_config: {},
+        organization_settings: {},
+        user_settings: { ...emptyUserSettings(), attributes: { email_address: { enabled: true } } },
+      },
+      {
+        auth_config: {},
+        organization_settings: {},
+        user_settings: { ...emptyUserSettings(), attributes: { email_address: { enabled: false } } },
+      },
+    );
+
+    await expect(main({ strict: true })).rejects.toThrow('process.exit(1)');
+    expect(exitCode).toBe(1);
+    expect(consoleLogs.some(m => m.includes('blocking mismatch'))).toBe(true);
+  });
+
+  it('reports but does not exit non-zero on a critical mismatch when not strict', async () => {
+    setPair();
+    mockEnvPair(
+      {
+        auth_config: {},
+        organization_settings: {},
+        user_settings: { ...emptyUserSettings(), attributes: { email_address: { enabled: true } } },
+      },
+      {
+        auth_config: {},
+        organization_settings: {},
+        user_settings: { ...emptyUserSettings(), attributes: { email_address: { enabled: false } } },
+      },
+    );
+
+    await main({ strict: false });
+    expect(exitCode).toBeUndefined();
+    expect(consoleLogs.some(m => m.includes('Report-only'))).toBe(true);
+  });
+
+  it('does not block on non-critical drift even in strict mode', async () => {
+    setPair();
+    mockEnvPair(
+      { auth_config: { single_session_mode: true }, organization_settings: {}, user_settings: emptyUserSettings() },
+      { auth_config: { single_session_mode: false }, organization_settings: {}, user_settings: emptyUserSettings() },
+    );
+
+    await main({ strict: true });
+    expect(exitCode).toBeUndefined();
+    expect(consoleLogs.some(m => m.includes('1 mismatched'))).toBe(true);
+  });
+
+  it('defaults strict from STAGING_VALIDATE_STRICT=1 and blocks on critical drift', async () => {
+    process.env.STAGING_VALIDATE_STRICT = '1';
+    setPair();
+    mockEnvPair(
+      {
+        auth_config: {},
+        organization_settings: {},
+        user_settings: { ...emptyUserSettings(), attributes: { phone_number: { enabled: true } } },
+      },
+      {
+        auth_config: {},
+        organization_settings: {},
+        user_settings: { ...emptyUserSettings(), attributes: { phone_number: { enabled: false } } },
+      },
+    );
+
+    await expect(main()).rejects.toThrow('process.exit(1)');
+    expect(exitCode).toBe(1);
+  });
 });