clerk · jacekradko · Jun 5, 2026 · Jun 5, 2026
diff --git a/.changeset/staging-e2e-resilience-p0.md b/.changeset/staging-e2e-resilience-p0.md
@@ -0,0 +1,2 @@
+---
+---
diff --git a/.changeset/staging-e2e-validate-gate.md b/.changeset/staging-e2e-validate-gate.md
@@ -0,0 +1,2 @@
+---
+---
diff --git a/.github/workflows/e2e-staging.yml b/.github/workflows/e2e-staging.yml
@@ -33,7 +33,11 @@ permissions:
   actions: write
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.inputs.ref || github.event.client_payload.ref || 'main' }}
+  # Key on the clerk_go commit being validated rather than the (effectively always "main")
+  # ref, so distinct staging deploys no longer cancel each other and each commit can report
+  # its own result. Duplicate dispatches for the SAME commit still de-dupe; manual dispatches
+  # without a commit SHA fall back to the unique run_id and are never cancelled.
+  group: ${{ github.workflow }}-${{ github.event.inputs.clerk-go-commit-sha || github.event.client_payload.clerk-go-commit-sha || github.run_id }}
   cancel-in-progress: true
 
 jobs:
@@ -107,18 +111,28 @@ jobs:
       - name: Validate staging instance settings
         run: node scripts/validate-staging-instances.mjs
         env:
+          # Report-only unless the `STAGING_VALIDATE_STRICT` repo variable is set to "true"/"1".
+          # When strict, a mismatch on critical config (see CRITICAL_PATHS in the script) fails
+          # this job, which gates the integration-tests job below so the run stops fast with a
+          # clear diagnostic instead of letting a drifted staging mirror produce opaque failures.
+          STAGING_VALIDATE_STRICT: ${{ vars.STAGING_VALIDATE_STRICT }}
           INTEGRATION_INSTANCE_KEYS: ${{ secrets.INTEGRATION_INSTANCE_KEYS }}
           INTEGRATION_STAGING_INSTANCE_KEYS: ${{ secrets.INTEGRATION_STAGING_INSTANCE_KEYS }}
 
   integration-tests:
     name: Integration Tests (${{ matrix.test-name }}, ${{ matrix.test-project }})
-    needs: [permissions-check]
-    if: ${{ always() && (needs.permissions-check.result == 'success' || needs.permissions-check.result == 'skipped') }}
+    needs: [permissions-check, validate-instances]
+    # Run when permissions passed/skipped AND the staging-config validation did not block.
+    # validate-instances only fails when strict gating is enabled and critical config drifted,
+    # so by default (report-only) this is a no-op and tests run as before.
+    if: ${{ always() && (needs.permissions-check.result == 'success' || needs.permissions-check.result == 'skipped') && (needs.validate-instances.result == 'success' || needs.validate-instances.result == 'skipped') }}
     runs-on: 'blacksmith-8vcpu-ubuntu-2204'
     defaults:
       run:
         shell: bash
-    timeout-minutes: ${{ vars.TIMEOUT_MINUTES_LONG && fromJSON(vars.TIMEOUT_MINUTES_LONG) || 15 }}
+    # Must stay above the 25-minute "Run Integration Tests" step budget below, otherwise the
+    # job-level cap kills the run mid-suite and reports a misleading timeout/cancellation.
+    timeout-minutes: ${{ vars.TIMEOUT_MINUTES_LONG && fromJSON(vars.TIMEOUT_MINUTES_LONG) || 30 }}
 
     strategy:
       fail-fast: false
@@ -293,6 +307,17 @@ jobs:
           path: test-results
           retention-days: 1
 
+      # Always upload the machine-readable report (even on success) so downstream
+      # reporting/classification can run regardless of the leg's pass/fail outcome.
+      - name: Upload Playwright JSON report
+        if: ${{ !cancelled() }}
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        with:
+          name: playwright-report-${{ github.run_id }}-${{ github.run_attempt }}-${{ steps.inputs.outputs.artifact-suffix }}
+          path: playwright-report/results.json
+          if-no-files-found: ignore
+          retention-days: 3
+
   report:
     name: Report Results
     needs: [integration-tests]

diff --git a/integration/playwright.config.ts b/integration/playwright.config.ts
@@ -27,6 +27,11 @@ export const common: PlaywrightTestConfig = {
 export default defineConfig({
   ...common,
 
+  // Emit a machine-readable report in CI so the staging workflow's report job can
+  // classify failures (flaky vs failed, infra vs regression) instead of reading a
+  // single pass/fail boolean. Local runs keep the default human-readable list output.
+  reporter: process.env.CI ? [['list'], ['json', { outputFile: 'playwright-report/results.json' }]] : 'list',
+
   projects: [
     {
       name: 'setup',

diff --git a/integration/tests/custom-pages.test.ts b/integration/tests/custom-pages.test.ts
@@ -1,3 +1,4 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
 import { expect, test } from '@playwright/test';
 
 import type { Application } from '../models/application';
@@ -172,6 +173,17 @@ testAgainstRunningApps({ withPattern: ['react.vite.withEmailCodes'] })(
     });
 
     test('custom profile page survives a parent rerender without remounting', async ({ page, context }) => {
+      // Validates the @clerk/react fix from #8604 (custom pages must not remount on a
+      // parent rerender). The staging leg installs published @latest packages
+      // (E2E_SDK_SOURCE=latest), which do not yet contain the fix, so this is
+      // deterministically red there until the next @clerk/react release. PR CI builds
+      // the SDK from the branch and still exercises this test.
+      // TODO: remove this skip once @clerk/react including #8604 is published to npm.
+      test.skip(
+        process.env.E2E_SDK_SOURCE === 'latest',
+        'validates the unreleased @clerk/react fix (#8604); covered by ref-built CI',
+      );
+
       const u = createTestUtils({ app, page, context });
       await u.po.signIn.goTo();
       await u.po.signIn.waitForMounted();

diff --git a/integration/tests/whatsapp-phone-code.test.ts b/integration/tests/whatsapp-phone-code.test.ts
@@ -1,3 +1,4 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
 import { expect, test } from '@playwright/test';
 
 import type { Application } from '../models/application';
@@ -6,6 +7,14 @@ import type { FakeUser } from '../testUtils';
 import { createTestUtils } from '../testUtils';
 
 test.describe('sign up and sign in with WhatsApp phone code @generic', () => {
+  // The WhatsApp alternate phone-code channel is not provisioned on the staging
+  // instance, so the WhatsApp sign-up button never renders there and every test in
+  // this suite times out deterministically (no amount of retrying helps). Unlike the
+  // long-running-app suites, this test builds its own app via `app.withEnv(...)` and
+  // therefore bypasses the `isStagingReady` graceful-skip. Skip it explicitly on
+  // staging until the channel is enabled on the staging mirror.
+  test.skip(process.env.E2E_STAGING === '1', 'WhatsApp channel is not enabled on the staging instance');
+
   const configs = [appConfigs.next.appRouter];
 
   configs.forEach(config => {

diff --git a/scripts/validate-staging-instances.mjs b/scripts/validate-staging-instances.mjs
@@ -35,6 +35,67 @@ function isIgnored(path) {
   return IGNORED_PATHS.some(pattern => pattern.test(path));
 }
 
+// ── Gating policy ────────────────────────────────────────────────────────────
+
+/**
+ * Functional configuration that must match between a production instance and its
+ * staging mirror for the e2e suite to be meaningful. A mismatch on any of these
+ * paths fails the gate in strict mode; every other difference is reported as
+ * informational drift and never blocks. Keep this list tight: only config that
+ * actually changes which auth flows are possible belongs here.
+ */
+const CRITICAL_PATHS = [
+  // An auth attribute (email_address, phone_number, username, ...) toggled on/off.
+  /^user_settings\.attributes\.[^.]+\.enabled$/,
+  // The phone-code channel set (sms / whatsapp), which drives alternate-channel UIs.
+  /^user_settings\.attributes\.phone_number\.channels$/,
+  // Enabled auth strategies / factors for an attribute.
+  /^user_settings\.attributes\.[^.]+\.(first_factors|second_factors|verifications)$/,
+  // A social provider enabled/disabled, or wholly added/removed.
+  /^user_settings\.social\.[^.]+(\.enabled)?$/,
+  // Password policy, which affects password sign-in / sign-up flows.
+  /^user_settings\.password_settings\..+/,
+];
+
+/**
+ * Known, intentionally-tolerated critical drift that should NOT fail the gate, so
+ * that NEW drift still does. Each entry needs a `path` (string or RegExp), an
+ * optional `instance` name to scope it, and a `reason` (ideally a tracking link).
+ * Prefer fixing the staging instance over adding entries here.
+ */
+const ACCEPTED_DRIFT = [
+  // e.g. { instance: 'with-whatsapp-phone-code', path: 'user_settings.attributes.phone_number.channels',
+  //        reason: 'WhatsApp channel not yet provisioned on staging (CLERK-XXXX)' },
+];
+
+function isCriticalPath(path) {
+  return CRITICAL_PATHS.some(pattern => pattern.test(path));
+}
+
+function isAcceptedDrift(instanceName, path, acceptedDrift = ACCEPTED_DRIFT) {
+  return acceptedDrift.some(entry => {
+    if (entry.instance !== undefined && entry.instance !== instanceName) return false;
+    return typeof entry.path === 'string' ? entry.path === path : entry.path.test(path);
+  });
+}
+
+/**
+ * Split a pair's mismatches into blocking (critical and not accepted) and
+ * informational. Pure and side-effect free for testability.
+ */
+function classifyMismatches(instanceName, mismatches, acceptedDrift = ACCEPTED_DRIFT) {
+  const blocking = [];
+  const informational = [];
+  for (const m of mismatches) {
+    if (isCriticalPath(m.path) && !isAcceptedDrift(instanceName, m.path, acceptedDrift)) {
+      blocking.push(m);
+    } else {
+      informational.push(m);
+    }
+  }
+  return { blocking, informational };
+}
+
 // ── Key loading ──────────────────────────────────────────────────────────────
 
 function loadKeys(envVar, filePath) {
@@ -311,7 +372,7 @@ function printReport(name, mismatches) {
 
 // ── Main ─────────────────────────────────────────────────────────────────────
 
-async function main() {
+async function main({ strict = ['1', 'true'].includes(process.env.STAGING_VALIDATE_STRICT) } = {}) {
   const { keys: prodKeys, errors: prodErrors } = loadKeys('INTEGRATION_INSTANCE_KEYS', 'integration/.keys.json');
   for (const err of prodErrors) console.error(`⚠️  Production keys: ${err}`);
   if (!prodKeys) {
@@ -367,6 +428,8 @@ async function main() {
 
   let mismatchCount = 0;
   let fetchFailCount = 0;
+  let blockingTotal = 0;
+  const blockingByInstance = [];
 
   for (const pair of validPairs) {
     const prodDomain = parseFapiDomain(pair.prod.pk);
@@ -386,6 +449,12 @@ async function main() {
     mismatches = collapseAttributeMismatches(mismatches);
     mismatches = collapseSocialMismatches(mismatches);
 
+    const { blocking } = classifyMismatches(pair.name, mismatches);
+    if (blocking.length > 0) {
+      blockingTotal += blocking.length;
+      blockingByInstance.push({ name: pair.name, paths: blocking.map(m => m.path) });
+    }
+
     if (mismatches.length > 0) mismatchCount++;
     printReport(pair.name, mismatches);
   }
@@ -397,12 +466,32 @@ async function main() {
   const matchedCount = validPairs.length - mismatchCount - fetchFailCount;
   if (matchedCount > 0) parts.push(`${matchedCount} matched`);
   console.log(`Summary: ${parts.join(', ')} (${validPairs.length} total)`);
+
+  // Gating: only mismatches on critical config block, and only in strict mode.
+  // Fetch failures and cosmetic drift never fail the build, to avoid false reds.
+  if (blockingTotal > 0) {
+    console.log('');
+    console.log(
+      `❌ ${blockingTotal} blocking mismatch(es) on critical config across ${blockingByInstance.length} instance(s):`,
+    );
+    for (const { name, paths } of blockingByInstance) {
+      for (const p of paths) console.log(`   - ${name}: ${p}`);
+    }
+    if (strict) {
+      console.error(
+        '\nStaging instance config has drifted on critical paths. Fix the staging instance(s) or add an accepted-drift entry.',
+      );
+      process.exit(1);
+    }
+    console.log('\n(Report-only: set STAGING_VALIDATE_STRICT=1 or pass --strict to fail the build on the above.)');
+  }
 }
 
 // Allow importing functions for testing while still being executable
 const isDirectRun = process.argv[1] === fileURLToPath(import.meta.url);
 if (isDirectRun) {
-  main().catch(err => {
+  const strict = ['1', 'true'].includes(process.env.STAGING_VALIDATE_STRICT) || process.argv.includes('--strict');
+  main({ strict }).catch(err => {
     console.error('Unexpected error:', err);
     process.exit(0);
   });
@@ -416,5 +505,8 @@ export {
   collapseAttributeMismatches,
   collapseSocialMismatches,
   compareEnvironments,
+  isCriticalPath,
+  isAcceptedDrift,
+  classifyMismatches,
   main,
 };