diff --git a/src/frontmatter/index.ts b/src/frontmatter/index.ts index 02f0be1..a893198 100644 --- a/src/frontmatter/index.ts +++ b/src/frontmatter/index.ts @@ -1,4 +1,4 @@ // Frontmatter submodule — parse and serialize -export { parseFrontmatter, parseTaskFile, parseTaskDirectory } from './parse.js'; +export { splitFrontmatter, parseFrontmatter, parseTaskFile, parseTaskDirectory } from './parse.js'; export { serializeFrontmatter } from './serialize.js'; \ No newline at end of file diff --git a/src/frontmatter/parse.ts b/src/frontmatter/parse.ts index 4033e56..20fa93c 100644 --- a/src/frontmatter/parse.ts +++ b/src/frontmatter/parse.ts @@ -1,5 +1,78 @@ // YAML/frontmatter parsing + typebox validation +/** + * Split a markdown string with `---`-delimited YAML frontmatter into its + * data and content parts. + * + * Rules (per architecture spec): + * 1. Opening `---` must appear at the very start of the string (optional BOM + * or leading whitespace on line 1 is stripped first, but the opening + * delimiter must still be the first non-whitespace content). + * 2. Only exactly three dashes (`---`) qualify as a delimiter — four or more + * (`----`) are NOT delimiters. + * 3. The closing delimiter must be `\n---` (i.e. a newline followed by three + * dashes). A closing `---` without a preceding newline is not valid. + * 4. Content body begins after the closing `---` plus its trailing newline. + * + * @returns `{ data, content }` when valid frontmatter is found, or `null` + * when the input has no valid frontmatter block. + */ +export function splitFrontmatter( + markdown: string, +): { data: string; content: string } | null { + // Strip optional UTF-8 BOM + const input = markdown.replace(/^\uFEFF/, ''); + + // Opening delimiter: must be `---` at start of string (after BOM removal), + // optionally preceded by whitespace on the first line. The `---` must be + // exactly 3 dashes — 4+ dashes are NOT a valid opening. + const openingMatch = /^[ \t]*(---)(?!\-)/.exec(input); + if (!openingMatch) return null; + + // The opening delimiter ends at the end of its line (consume the newline) + const afterOpening = input.indexOf('\n', openingMatch.index + openingMatch[0].length); + if (afterOpening === -1) return null; + + // Search for closing delimiter: `\n---` with exactly 3 dashes (not 4+). + // The data section starts right after the opening delimiter's newline. + const dataStart = afterOpening + 1; + + // Helper: check if `pos` starts a valid closing `---` line (exactly 3 dashes, + // not 4+), and if so, return the parsed result. + const tryClosing = (closingPos: number, dataEnd: number): { data: string; content: string } | null => { + if ( + input[closingPos] === '-' && + input[closingPos + 1] === '-' && + input[closingPos + 2] === '-' && + input[closingPos + 3] !== '-' + ) { + const data = input.slice(dataStart, dataEnd); + // Content starts after the closing `---` plus its trailing newline. + // If there's no trailing newline, content is empty. + const afterClosing = input.indexOf('\n', closingPos + 3); + const content = afterClosing === -1 ? '' : input.slice(afterClosing + 1); + return { data, content }; + } + return null; + }; + + // Check immediately after opening newline (handles `---\n---` with empty data) + const immediate = tryClosing(dataStart, dataStart); + if (immediate !== null) return immediate; + + // Scan for newlines and check each as a potential closing delimiter + for (let i = dataStart; i < input.length; i++) { + if (input[i] !== '\n') continue; + + const lineStart = i + 1; + const result = tryClosing(lineStart, i); + if (result !== null) return result; + } + + // No valid closing delimiter found + return null; +} + export function parseFrontmatter(_input: string): unknown { // Stub — implementation pending return {}; diff --git a/tasks/implementation/frontmatter/splitter.md b/tasks/implementation/frontmatter/splitter.md index 6232cd5..fb72d4a 100644 --- a/tasks/implementation/frontmatter/splitter.md +++ b/tasks/implementation/frontmatter/splitter.md @@ -1,7 +1,7 @@ --- id: frontmatter/splitter name: Implement frontmatter delimiter splitter (~40 lines) -status: pending +status: completed depends_on: - setup/project-init scope: single @@ -22,15 +22,15 @@ Per [frontmatter.md](../../../docs/architecture/frontmatter.md), the splitter: ## Acceptance Criteria -- [ ] `splitFrontmatter(markdown: string): { data: string; content: string } | null` -- [ ] Opening `---` must be at the start of the file (or after optional BOM/whitespace on first line) -- [ ] `----` (4+ dashes) is NOT a valid delimiter — only exact `---` -- [ ] Closing delimiter requires `\n---` (newline before dashes) -- [ ] Returns `null` if no valid frontmatter found -- [ ] Returns `{ data: "", content: "" }` if frontmatter is present but empty (e.g., `---\n---`) -- [ ] Content body starts after the closing `---` + newline -- [ ] Handles edge cases: no closing delimiter (returns null), file with only `---\n---`, file with no `---` at all -- [ ] Unit tests: standard frontmatter, no frontmatter, empty frontmatter, multi-line content, dashes in content body (shouldn't be treated as delimiters), 4+ dashes ignored +- [x] `splitFrontmatter(markdown: string): { data: string; content: string } | null` +- [x] Opening `---` must be at the start of the file (or after optional BOM/whitespace on first line) +- [x] `----` (4+ dashes) is NOT a valid delimiter — only exact `---` +- [x] Closing delimiter requires `\n---` (newline before dashes) +- [x] Returns `null` if no valid frontmatter found +- [x] Returns `{ data: "", content: "" }` if frontmatter is present but empty (e.g., `---\n---`) +- [x] Content body starts after the closing `---` + newline +- [x] Handles edge cases: no closing delimiter (returns null), file with only `---\n---`, file with no `---` at all +- [x] Unit tests: standard frontmatter, no frontmatter, empty frontmatter, multi-line content, dashes in content body (shouldn't be treated as delimiters), 4+ dashes ignored ## References @@ -38,8 +38,13 @@ Per [frontmatter.md](../../../docs/architecture/frontmatter.md), the splitter: ## Notes -> To be filled by implementation agent +Self-contained `splitFrontmatter` function implemented with no external dependencies. Uses regex for opening delimiter match and manual scan for closing delimiter to enforce exact 3-dash rule. Handles BOM stripping and empty frontmatter. ## Summary -> To be filled on completion \ No newline at end of file +Implemented the `splitFrontmatter` function in `src/frontmatter/parse.ts` per architecture spec. +- Modified: `src/frontmatter/parse.ts` (added `splitFrontmatter` function, ~65 lines including JSDoc) +- Modified: `src/frontmatter/index.ts` (exported `splitFrontmatter`) +- Modified: `test/frontmatter.test.ts` (18 comprehensive tests) +- Tests: 18 splitFrontmatter tests + 4 existing placeholder tests, all passing (22 total) +- TypeScript check: passing \ No newline at end of file diff --git a/test/frontmatter.test.ts b/test/frontmatter.test.ts index 4f560f7..d86580d 100644 --- a/test/frontmatter.test.ts +++ b/test/frontmatter.test.ts @@ -1,7 +1,180 @@ import { describe, it, expect } from 'vitest'; +import { splitFrontmatter } from '../src/frontmatter/parse.js'; -describe('Frontmatter', () => { - it('placeholder — parse and serialize', () => { - expect(true).toBe(true); +describe('splitFrontmatter', () => { + // ─── Standard frontmatter ──────────────────────────────────────────── + + it('extracts YAML data and markdown content from standard frontmatter', () => { + const input = `--- +title: Hello +status: pending +--- +# Heading + +Some content here. +`; + + const result = splitFrontmatter(input); + expect(result).not.toBeNull(); + expect(result!.data).toBe('title: Hello\nstatus: pending'); + expect(result!.content).toBe('# Heading\n\nSome content here.\n'); + }); + + it('handles multi-line YAML data', () => { + const input = `--- +title: My Task +depends_on: + - task-a + - task-b +--- +Content here`; + + const result = splitFrontmatter(input); + expect(result).not.toBeNull(); + expect(result!.data).toBe('title: My Task\ndepends_on:\n - task-a\n - task-b'); + expect(result!.content).toBe('Content here'); + }); + + // ─── Empty frontmatter ─────────────────────────────────────────────── + + it('returns empty data and content for "---\\n---"', () => { + const result = splitFrontmatter('---\n---'); + expect(result).not.toBeNull(); + expect(result!.data).toBe(''); + expect(result!.content).toBe(''); + }); + + it('returns empty data with trailing content after empty frontmatter', () => { + const result = splitFrontmatter('---\n---\nSome content'); + expect(result).not.toBeNull(); + expect(result!.data).toBe(''); + expect(result!.content).toBe('Some content'); + }); + + // ─── No frontmatter ────────────────────────────────────────────────── + + it('returns null when there is no frontmatter at all', () => { + const result = splitFrontmatter('Hello world\nNo frontmatter here'); + expect(result).toBeNull(); + }); + + it('returns null when file starts with text (no opening ---)', () => { + const result = splitFrontmatter('Some text\n---\nMore text'); + expect(result).toBeNull(); + }); + + it('returns null when opening --- exists but no closing delimiter', () => { + const result = splitFrontmatter('---\ntitle: Hello\nNo closing delimiter'); + expect(result).toBeNull(); + }); + + // ─── 4+ dashes are NOT delimiters ──────────────────────────────────── + + it('returns null when opening delimiter is 4+ dashes (----)', () => { + const result = splitFrontmatter('----\ntitle: Hello\n----\nContent'); + expect(result).toBeNull(); + }); + + it('does not treat ---- as a closing delimiter', () => { + const input = `--- +title: Hello +---- +Content after four dashes`; + const result = splitFrontmatter(input); + // No valid closing delimiter found (---- doesn't count) + expect(result).toBeNull(); + }); + + it('does not treat 5 dashes as a closing delimiter', () => { + const input = `--- +title: Hello +----- +Content`; + const result = splitFrontmatter(input); + expect(result).toBeNull(); + }); + + // ─── Dashes in content body (shouldn't be treated as delimiters) ───── + + it('ignores --- in the content body after valid frontmatter', () => { + const input = `--- +title: Hello +--- +Some text + +--- +More text +`; + + const result = splitFrontmatter(input); + expect(result).not.toBeNull(); + expect(result!.data).toBe('title: Hello'); + expect(result!.content).toBe('Some text\n\n---\nMore text\n'); + }); + + it('handles horizontal rule (---) in content', () => { + const input = `--- +title: Hello +--- +Paragraph above + +--- + +Paragraph below +`; + + const result = splitFrontmatter(input); + expect(result).not.toBeNull(); + expect(result!.data).toBe('title: Hello'); + expect(result!.content).toBe('Paragraph above\n\n---\n\nParagraph below\n'); + }); + + // ─── BOM handling ───────────────────────────────────────────────────── + + it('handles UTF-8 BOM at start of file', () => { + const input = '\uFEFF---\ntitle: Hello\n---\nContent'; + const result = splitFrontmatter(input); + expect(result).not.toBeNull(); + expect(result!.data).toBe('title: Hello'); + expect(result!.content).toBe('Content'); + }); + + // ─── Edge cases ────────────────────────────────────────────────────── + + it('file with only "---\\n---"', () => { + const result = splitFrontmatter('---\n---'); + expect(result).not.toBeNull(); + expect(result!.data).toBe(''); + expect(result!.content).toBe(''); + }); + + it('content body starts after closing ---\\n', () => { + const input = `--- +key: value +--- +Body starts here`; + const result = splitFrontmatter(input); + expect(result).not.toBeNull(); + expect(result!.content).toBe('Body starts here'); + }); + + it('content is empty string when closing --- is at end of file with no trailing newline', () => { + const result = splitFrontmatter('---\nkey: value\n---'); + expect(result).not.toBeNull(); + expect(result!.data).toBe('key: value'); + expect(result!.content).toBe(''); + }); + + it('handles leading whitespace on first line before ---', () => { + const input = ' ---\ntitle: Hello\n---\nContent'; + const result = splitFrontmatter(input); + expect(result).not.toBeNull(); + expect(result!.data).toBe('title: Hello'); + expect(result!.content).toBe('Content'); + }); + + it('returns null if only opening --- with no newline', () => { + const result = splitFrontmatter('---'); + expect(result).toBeNull(); }); }); \ No newline at end of file