From 7b59e11f79207a35117e77dd401f4ab7ed1485a2 Mon Sep 17 00:00:00 2001 From: Jacek Date: Thu, 4 Jun 2026 21:43:46 -0500 Subject: [PATCH 1/2] ci(e2e): harden staging e2e against deterministic and flaky reds The staging e2e "generic" leg was red on ~100% of runs because a few independent failures sat behind an all-or-nothing gate: - whatsapp-phone-code: the WhatsApp channel is not enabled on the staging instance, so the button never renders and the suite times out every run. It also bypasses the isStagingReady graceful-skip, so skip it explicitly on staging until the channel is provisioned. - custom-pages "survives a parent rerender": validates an unreleased @clerk/react fix (#8604), but the staging leg installs published @latest, so it is deterministically red until release. Skip when E2E_SDK_SOURCE=latest; PR CI (ref builds) still covers it. - concurrency was keyed on ref (effectively always "main") with cancel-in-progress, so each new staging deploy cancelled the in-flight run and no commit could report a status. Key on the clerk_go commit instead. - raise the job timeout above the 25-minute test step so the job cap no longer kills runs mid-suite. - emit and upload a JSON Playwright report in CI so the report job can classify failures (flaky vs failed, infra vs regression) later. --- .changeset/staging-e2e-resilience-p0.md | 2 ++ .github/workflows/e2e-staging.yml | 21 +++++++++++++++++-- integration/playwright.config.ts | 5 +++++ integration/tests/custom-pages.test.ts | 12 +++++++++++ integration/tests/whatsapp-phone-code.test.ts | 9 ++++++++ 5 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 .changeset/staging-e2e-resilience-p0.md diff --git a/.changeset/staging-e2e-resilience-p0.md b/.changeset/staging-e2e-resilience-p0.md new file mode 100644 index 00000000000..a845151cc84 --- /dev/null +++ b/.changeset/staging-e2e-resilience-p0.md @@ -0,0 +1,2 @@ +--- +--- diff --git a/.github/workflows/e2e-staging.yml b/.github/workflows/e2e-staging.yml index bb3d3772505..7ae9daca3c9 100644 --- a/.github/workflows/e2e-staging.yml +++ b/.github/workflows/e2e-staging.yml @@ -33,7 +33,11 @@ permissions: actions: write concurrency: - group: ${{ github.workflow }}-${{ github.event.inputs.ref || github.event.client_payload.ref || 'main' }} + # Key on the clerk_go commit being validated rather than the (effectively always "main") + # ref, so distinct staging deploys no longer cancel each other and each commit can report + # its own result. Duplicate dispatches for the SAME commit still de-dupe; manual dispatches + # without a commit SHA fall back to the unique run_id and are never cancelled. + group: ${{ github.workflow }}-${{ github.event.inputs.clerk-go-commit-sha || github.event.client_payload.clerk-go-commit-sha || github.run_id }} cancel-in-progress: true jobs: @@ -118,7 +122,9 @@ jobs: defaults: run: shell: bash - timeout-minutes: ${{ vars.TIMEOUT_MINUTES_LONG && fromJSON(vars.TIMEOUT_MINUTES_LONG) || 15 }} + # Must stay above the 25-minute "Run Integration Tests" step budget below, otherwise the + # job-level cap kills the run mid-suite and reports a misleading timeout/cancellation. + timeout-minutes: ${{ vars.TIMEOUT_MINUTES_LONG && fromJSON(vars.TIMEOUT_MINUTES_LONG) || 30 }} strategy: fail-fast: false @@ -293,6 +299,17 @@ jobs: path: test-results retention-days: 1 + # Always upload the machine-readable report (even on success) so downstream + # reporting/classification can run regardless of the leg's pass/fail outcome. + - name: Upload Playwright JSON report + if: ${{ !cancelled() }} + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: playwright-report-${{ github.run_id }}-${{ github.run_attempt }}-${{ steps.inputs.outputs.artifact-suffix }} + path: playwright-report/results.json + if-no-files-found: ignore + retention-days: 3 + report: name: Report Results needs: [integration-tests] diff --git a/integration/playwright.config.ts b/integration/playwright.config.ts index fbcd35fa2a9..911172a94a6 100644 --- a/integration/playwright.config.ts +++ b/integration/playwright.config.ts @@ -27,6 +27,11 @@ export const common: PlaywrightTestConfig = { export default defineConfig({ ...common, + // Emit a machine-readable report in CI so the staging workflow's report job can + // classify failures (flaky vs failed, infra vs regression) instead of reading a + // single pass/fail boolean. Local runs keep the default human-readable list output. + reporter: process.env.CI ? [['list'], ['json', { outputFile: 'playwright-report/results.json' }]] : 'list', + projects: [ { name: 'setup', diff --git a/integration/tests/custom-pages.test.ts b/integration/tests/custom-pages.test.ts index 0636e964c71..5f7304f615a 100644 --- a/integration/tests/custom-pages.test.ts +++ b/integration/tests/custom-pages.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable turbo/no-undeclared-env-vars */ import { expect, test } from '@playwright/test'; import type { Application } from '../models/application'; @@ -172,6 +173,17 @@ testAgainstRunningApps({ withPattern: ['react.vite.withEmailCodes'] })( }); test('custom profile page survives a parent rerender without remounting', async ({ page, context }) => { + // Validates the @clerk/react fix from #8604 (custom pages must not remount on a + // parent rerender). The staging leg installs published @latest packages + // (E2E_SDK_SOURCE=latest), which do not yet contain the fix, so this is + // deterministically red there until the next @clerk/react release. PR CI builds + // the SDK from the branch and still exercises this test. + // TODO: remove this skip once @clerk/react including #8604 is published to npm. + test.skip( + process.env.E2E_SDK_SOURCE === 'latest', + 'validates the unreleased @clerk/react fix (#8604); covered by ref-built CI', + ); + const u = createTestUtils({ app, page, context }); await u.po.signIn.goTo(); await u.po.signIn.waitForMounted(); diff --git a/integration/tests/whatsapp-phone-code.test.ts b/integration/tests/whatsapp-phone-code.test.ts index 8f5c310f82f..10e240438ca 100644 --- a/integration/tests/whatsapp-phone-code.test.ts +++ b/integration/tests/whatsapp-phone-code.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable turbo/no-undeclared-env-vars */ import { expect, test } from '@playwright/test'; import type { Application } from '../models/application'; @@ -6,6 +7,14 @@ import type { FakeUser } from '../testUtils'; import { createTestUtils } from '../testUtils'; test.describe('sign up and sign in with WhatsApp phone code @generic', () => { + // The WhatsApp alternate phone-code channel is not provisioned on the staging + // instance, so the WhatsApp sign-up button never renders there and every test in + // this suite times out deterministically (no amount of retrying helps). Unlike the + // long-running-app suites, this test builds its own app via `app.withEnv(...)` and + // therefore bypasses the `isStagingReady` graceful-skip. Skip it explicitly on + // staging until the channel is enabled on the staging mirror. + test.skip(process.env.E2E_STAGING === '1', 'WhatsApp channel is not enabled on the staging instance'); + const configs = [appConfigs.next.appRouter]; configs.forEach(config => { From 0eb5396dc9614ad3bf5fd6a1563a2f5aec93833b Mon Sep 17 00:00:00 2001 From: Jacek Date: Thu, 4 Jun 2026 21:59:57 -0500 Subject: [PATCH 2/2] ci(e2e): gate staging e2e on critical staging-instance config drift validate-staging-instances.mjs already diffs prod vs staging /v1/environment but every exit path returned 0, so detected drift blocked nothing and the job was not a dependency of the test matrix. A drifted staging mirror (e.g. a missing phone_number WhatsApp channel) therefore surfaced only as opaque test timeouts 200 tests deep. Add a tight CRITICAL_PATHS allowlist (attribute enabled toggles, phone_number.channels, auth factors/strategies, social enable/disable, password settings) and an ACCEPTED_DRIFT escape hatch so known gaps don't block while new drift does. In strict mode the script exits non-zero on a blocking mismatch; fetch failures and cosmetic drift never fail the build. Wire integration-tests to need validate-instances, and drive strictness from the STAGING_VALIDATE_STRICT repo variable (default report-only). So this is a no-op until the team opts in: it logs blocking drift and the proposed gate without failing anything. Flip the variable to make it enforce. --- .changeset/staging-e2e-validate-gate.md | 2 + .github/workflows/e2e-staging.yml | 12 +- scripts/validate-staging-instances.mjs | 96 +++++++++++- scripts/validate-staging-instances.test.mjs | 165 ++++++++++++++++++++ 4 files changed, 271 insertions(+), 4 deletions(-) create mode 100644 .changeset/staging-e2e-validate-gate.md diff --git a/.changeset/staging-e2e-validate-gate.md b/.changeset/staging-e2e-validate-gate.md new file mode 100644 index 00000000000..a845151cc84 --- /dev/null +++ b/.changeset/staging-e2e-validate-gate.md @@ -0,0 +1,2 @@ +--- +--- diff --git a/.github/workflows/e2e-staging.yml b/.github/workflows/e2e-staging.yml index 7ae9daca3c9..22a7863c31c 100644 --- a/.github/workflows/e2e-staging.yml +++ b/.github/workflows/e2e-staging.yml @@ -111,13 +111,21 @@ jobs: - name: Validate staging instance settings run: node scripts/validate-staging-instances.mjs env: + # Report-only unless the `STAGING_VALIDATE_STRICT` repo variable is set to "true"/"1". + # When strict, a mismatch on critical config (see CRITICAL_PATHS in the script) fails + # this job, which gates the integration-tests job below so the run stops fast with a + # clear diagnostic instead of letting a drifted staging mirror produce opaque failures. + STAGING_VALIDATE_STRICT: ${{ vars.STAGING_VALIDATE_STRICT }} INTEGRATION_INSTANCE_KEYS: ${{ secrets.INTEGRATION_INSTANCE_KEYS }} INTEGRATION_STAGING_INSTANCE_KEYS: ${{ secrets.INTEGRATION_STAGING_INSTANCE_KEYS }} integration-tests: name: Integration Tests (${{ matrix.test-name }}, ${{ matrix.test-project }}) - needs: [permissions-check] - if: ${{ always() && (needs.permissions-check.result == 'success' || needs.permissions-check.result == 'skipped') }} + needs: [permissions-check, validate-instances] + # Run when permissions passed/skipped AND the staging-config validation did not block. + # validate-instances only fails when strict gating is enabled and critical config drifted, + # so by default (report-only) this is a no-op and tests run as before. + if: ${{ always() && (needs.permissions-check.result == 'success' || needs.permissions-check.result == 'skipped') && (needs.validate-instances.result == 'success' || needs.validate-instances.result == 'skipped') }} runs-on: 'blacksmith-8vcpu-ubuntu-2204' defaults: run: diff --git a/scripts/validate-staging-instances.mjs b/scripts/validate-staging-instances.mjs index 5afb0178617..25a519c349b 100644 --- a/scripts/validate-staging-instances.mjs +++ b/scripts/validate-staging-instances.mjs @@ -35,6 +35,67 @@ function isIgnored(path) { return IGNORED_PATHS.some(pattern => pattern.test(path)); } +// ── Gating policy ──────────────────────────────────────────────────────────── + +/** + * Functional configuration that must match between a production instance and its + * staging mirror for the e2e suite to be meaningful. A mismatch on any of these + * paths fails the gate in strict mode; every other difference is reported as + * informational drift and never blocks. Keep this list tight: only config that + * actually changes which auth flows are possible belongs here. + */ +const CRITICAL_PATHS = [ + // An auth attribute (email_address, phone_number, username, ...) toggled on/off. + /^user_settings\.attributes\.[^.]+\.enabled$/, + // The phone-code channel set (sms / whatsapp), which drives alternate-channel UIs. + /^user_settings\.attributes\.phone_number\.channels$/, + // Enabled auth strategies / factors for an attribute. + /^user_settings\.attributes\.[^.]+\.(first_factors|second_factors|verifications)$/, + // A social provider enabled/disabled, or wholly added/removed. + /^user_settings\.social\.[^.]+(\.enabled)?$/, + // Password policy, which affects password sign-in / sign-up flows. + /^user_settings\.password_settings\..+/, +]; + +/** + * Known, intentionally-tolerated critical drift that should NOT fail the gate, so + * that NEW drift still does. Each entry needs a `path` (string or RegExp), an + * optional `instance` name to scope it, and a `reason` (ideally a tracking link). + * Prefer fixing the staging instance over adding entries here. + */ +const ACCEPTED_DRIFT = [ + // e.g. { instance: 'with-whatsapp-phone-code', path: 'user_settings.attributes.phone_number.channels', + // reason: 'WhatsApp channel not yet provisioned on staging (CLERK-XXXX)' }, +]; + +function isCriticalPath(path) { + return CRITICAL_PATHS.some(pattern => pattern.test(path)); +} + +function isAcceptedDrift(instanceName, path, acceptedDrift = ACCEPTED_DRIFT) { + return acceptedDrift.some(entry => { + if (entry.instance !== undefined && entry.instance !== instanceName) return false; + return typeof entry.path === 'string' ? entry.path === path : entry.path.test(path); + }); +} + +/** + * Split a pair's mismatches into blocking (critical and not accepted) and + * informational. Pure and side-effect free for testability. + */ +function classifyMismatches(instanceName, mismatches, acceptedDrift = ACCEPTED_DRIFT) { + const blocking = []; + const informational = []; + for (const m of mismatches) { + if (isCriticalPath(m.path) && !isAcceptedDrift(instanceName, m.path, acceptedDrift)) { + blocking.push(m); + } else { + informational.push(m); + } + } + return { blocking, informational }; +} + // ── Key loading ────────────────────────────────────────────────────────────── function loadKeys(envVar, filePath) { @@ -311,7 +372,7 @@ function printReport(name, mismatches) { // ── Main ───────────────────────────────────────────────────────────────────── -async function main() { +async function main({ strict = ['1', 'true'].includes(process.env.STAGING_VALIDATE_STRICT) } = {}) { const { keys: prodKeys, errors: prodErrors } = loadKeys('INTEGRATION_INSTANCE_KEYS', 'integration/.keys.json'); for (const err of prodErrors) console.error(`⚠️ Production keys: ${err}`); if (!prodKeys) { @@ -367,6 +428,8 @@ async function main() { let mismatchCount = 0; let fetchFailCount = 0; + let blockingTotal = 0; + const blockingByInstance = []; for (const pair of validPairs) { const prodDomain = parseFapiDomain(pair.prod.pk); @@ -386,6 +449,12 @@ async function main() { mismatches = collapseAttributeMismatches(mismatches); mismatches = collapseSocialMismatches(mismatches); + const { blocking } = classifyMismatches(pair.name, mismatches); + if (blocking.length > 0) { + blockingTotal += blocking.length; + blockingByInstance.push({ name: pair.name, paths: blocking.map(m => m.path) }); + } + if (mismatches.length > 0) mismatchCount++; printReport(pair.name, mismatches); } @@ -397,12 +466,32 @@ async function main() { const matchedCount = validPairs.length - mismatchCount - fetchFailCount; if (matchedCount > 0) parts.push(`${matchedCount} matched`); console.log(`Summary: ${parts.join(', ')} (${validPairs.length} total)`); + + // Gating: only mismatches on critical config block, and only in strict mode. + // Fetch failures and cosmetic drift never fail the build, to avoid false reds. + if (blockingTotal > 0) { + console.log(''); + console.log( + `❌ ${blockingTotal} blocking mismatch(es) on critical config across ${blockingByInstance.length} instance(s):`, + ); + for (const { name, paths } of blockingByInstance) { + for (const p of paths) console.log(` - ${name}: ${p}`); + } + if (strict) { + console.error( + '\nStaging instance config has drifted on critical paths. Fix the staging instance(s) or add an accepted-drift entry.', + ); + process.exit(1); + } + console.log('\n(Report-only: set STAGING_VALIDATE_STRICT=1 or pass --strict to fail the build on the above.)'); + } } // Allow importing functions for testing while still being executable const isDirectRun = process.argv[1] === fileURLToPath(import.meta.url); if (isDirectRun) { - main().catch(err => { + const strict = ['1', 'true'].includes(process.env.STAGING_VALIDATE_STRICT) || process.argv.includes('--strict'); + main({ strict }).catch(err => { console.error('Unexpected error:', err); process.exit(0); }); @@ -416,5 +505,8 @@ export { collapseAttributeMismatches, collapseSocialMismatches, compareEnvironments, + isCriticalPath, + isAcceptedDrift, + classifyMismatches, main, }; diff --git a/scripts/validate-staging-instances.test.mjs b/scripts/validate-staging-instances.test.mjs index 57c86a53801..0193c7fb533 100644 --- a/scripts/validate-staging-instances.test.mjs +++ b/scripts/validate-staging-instances.test.mjs @@ -7,10 +7,12 @@ vi.mock('node:fs', async importOriginal => { }); import { + classifyMismatches, collapseAttributeMismatches, collapseSocialMismatches, diffObjects, fetchEnvironment, + isCriticalPath, loadKeys, main, parseFapiDomain, @@ -317,6 +319,78 @@ describe('collapseSocialMismatches', () => { }); }); +// ── isCriticalPath ────────────────────────────────────────────────────────── + +describe('isCriticalPath', () => { + it('flags attribute enabled toggles', () => { + expect(isCriticalPath('user_settings.attributes.phone_number.enabled')).toBe(true); + expect(isCriticalPath('user_settings.attributes.email_address.enabled')).toBe(true); + }); + + it('flags phone channel changes', () => { + expect(isCriticalPath('user_settings.attributes.phone_number.channels')).toBe(true); + }); + + it('flags factor / verification strategy changes', () => { + expect(isCriticalPath('user_settings.attributes.email_address.first_factors')).toBe(true); + expect(isCriticalPath('user_settings.attributes.phone_number.second_factors')).toBe(true); + expect(isCriticalPath('user_settings.attributes.email_address.verifications')).toBe(true); + }); + + it('flags social provider enable/disable and wholly added/removed', () => { + expect(isCriticalPath('user_settings.social.google')).toBe(true); + expect(isCriticalPath('user_settings.social.google.enabled')).toBe(true); + }); + + it('flags password settings', () => { + expect(isCriticalPath('user_settings.password_settings.min_length')).toBe(true); + }); + + it('does not flag cosmetic / non-critical paths', () => { + expect(isCriticalPath('auth_config.single_session_mode')).toBe(false); + expect(isCriticalPath('organization_settings.enabled')).toBe(false); + expect(isCriticalPath('user_settings.social.google.strategy')).toBe(false); + expect(isCriticalPath('user_settings.sign_in.second_factor.required')).toBe(false); + }); +}); + +// ── classifyMismatches ────────────────────────────────────────────────────── + +describe('classifyMismatches', () => { + it('separates blocking (critical) from informational drift', () => { + const mismatches = [ + { path: 'user_settings.attributes.email_address.enabled', prod: true, staging: false }, + { path: 'auth_config.single_session_mode', prod: true, staging: false }, + ]; + const { blocking, informational } = classifyMismatches('myapp', mismatches); + expect(blocking.map(m => m.path)).toEqual(['user_settings.attributes.email_address.enabled']); + expect(informational.map(m => m.path)).toEqual(['auth_config.single_session_mode']); + }); + + it('respects accepted drift scoped to a specific instance', () => { + const mismatches = [ + { + path: 'user_settings.attributes.phone_number.channels', + prod: ['sms', 'whatsapp'], + staging: ['sms'], + missingOnStaging: ['whatsapp'], + }, + ]; + const accepted = [ + { instance: 'with-whatsapp-phone-code', path: 'user_settings.attributes.phone_number.channels', reason: 'x' }, + ]; + expect(classifyMismatches('with-whatsapp-phone-code', mismatches, accepted).blocking).toHaveLength(0); + // The same drift on a different instance is NOT accepted. + expect(classifyMismatches('other-instance', mismatches, accepted).blocking).toHaveLength(1); + }); + + it('accepts drift matched by a RegExp path with no instance scope', () => { + const mismatches = [{ path: 'user_settings.password_settings.min_length', prod: 8, staging: 6 }]; + const accepted = [{ path: /^user_settings\.password_settings\./, reason: 'x' }]; + expect(classifyMismatches('any', mismatches, accepted).blocking).toHaveLength(0); + }); +}); + // ── fetchEnvironment ──────────────────────────────────────────────────────── describe('fetchEnvironment', () => { @@ -387,6 +461,7 @@ describe('main', () => { // Clean up env vars delete process.env.INTEGRATION_INSTANCE_KEYS; delete process.env.INTEGRATION_STAGING_INSTANCE_KEYS; + delete process.env.STAGING_VALIDATE_STRICT; }); afterEach(() => { @@ -535,4 +610,94 @@ describe('main', () => { expect(consoleErrors.some(m => m.includes('bad_entry'))).toBe(true); expect(consoleLogs.some(m => m.includes('1 key load errors'))).toBe(true); }); + + // ── strict gating ────────────────────────────────────────────────────────── + + function mockEnvPair(prodEnv, stagingEnv) { + let callCount = 0; + vi.spyOn(globalThis, 'fetch').mockImplementation(() => { + callCount++; + const env = callCount % 2 === 1 ? prodEnv : stagingEnv; + return Promise.resolve({ ok: true, json: () => Promise.resolve(env) }); + }); + } + + const emptyUserSettings = () => ({ attributes: {}, social: {}, sign_in: {}, sign_up: {}, password_settings: {} }); + + function setPair() { + process.env.INTEGRATION_INSTANCE_KEYS = JSON.stringify({ myapp: { pk: PROD_PK } }); + process.env.INTEGRATION_STAGING_INSTANCE_KEYS = JSON.stringify({ 'clerkstage-myapp': { pk: STAGING_PK } }); + } + + it('exits non-zero in strict mode when a critical config path drifts', async () => { + setPair(); + mockEnvPair( + { + auth_config: {}, + organization_settings: {}, + user_settings: { ...emptyUserSettings(), attributes: { email_address: { enabled: true } } }, + }, + { + auth_config: {}, + organization_settings: {}, + user_settings: { ...emptyUserSettings(), attributes: { email_address: { enabled: false } } }, + }, + ); + + await expect(main({ strict: true })).rejects.toThrow('process.exit(1)'); + expect(exitCode).toBe(1); + expect(consoleLogs.some(m => m.includes('blocking mismatch'))).toBe(true); + }); + + it('reports but does not exit non-zero on a critical mismatch when not strict', async () => { + setPair(); + mockEnvPair( + { + auth_config: {}, + organization_settings: {}, + user_settings: { ...emptyUserSettings(), attributes: { email_address: { enabled: true } } }, + }, + { + auth_config: {}, + organization_settings: {}, + user_settings: { ...emptyUserSettings(), attributes: { email_address: { enabled: false } } }, + }, + ); + + await main({ strict: false }); + expect(exitCode).toBeUndefined(); + expect(consoleLogs.some(m => m.includes('Report-only'))).toBe(true); + }); + + it('does not block on non-critical drift even in strict mode', async () => { + setPair(); + mockEnvPair( + { auth_config: { single_session_mode: true }, organization_settings: {}, user_settings: emptyUserSettings() }, + { auth_config: { single_session_mode: false }, organization_settings: {}, user_settings: emptyUserSettings() }, + ); + + await main({ strict: true }); + expect(exitCode).toBeUndefined(); + expect(consoleLogs.some(m => m.includes('1 mismatched'))).toBe(true); + }); + + it('defaults strict from STAGING_VALIDATE_STRICT=1 and blocks on critical drift', async () => { + process.env.STAGING_VALIDATE_STRICT = '1'; + setPair(); + mockEnvPair( + { + auth_config: {}, + organization_settings: {}, + user_settings: { ...emptyUserSettings(), attributes: { phone_number: { enabled: true } } }, + }, + { + auth_config: {}, + organization_settings: {}, + user_settings: { ...emptyUserSettings(), attributes: { phone_number: { enabled: false } } }, + }, + ); + + await expect(main()).rejects.toThrow('process.exit(1)'); + expect(exitCode).toBe(1); + }); });