diff --git a/lab/action-picker/client/x-action-picker-view.ts b/lab/action-picker/client/x-action-picker-view.ts index d28c729c65b42f53ee969e3bdd0750076db42366..b935cdeb7e376ffe076108bf63b2312c41c0587f 100644 --- a/lab/action-picker/client/x-action-picker-view.ts +++ b/lab/action-picker/client/x-action-picker-view.ts @@ -168,7 +168,7 @@ export class XActionPickerView extends ChemistryLitElement { combineLatest([ this._actions.observable, !this.searchProvider || this.searchProvider.debounceTime === 0 - ? this._search.observable.pipe(debounceTime(150)) + ? this._search.observable.pipe(debounceTime(800)) : this._search.observable.pipe(debounceTime(this.searchProvider.debounceTime)), ]) .pipe(takeUntil(this.disconnected)) diff --git a/lab/wiki/api/WikiDraftAPI.ts b/lab/wiki/api/WikiDraftAPI.ts index 22710a51c7eab9525707767671a348ea3f18b04f..ee651c0f38d13cacab9b74b866f8c244c93b504c 100644 --- a/lab/wiki/api/WikiDraftAPI.ts +++ b/lab/wiki/api/WikiDraftAPI.ts @@ -17,6 +17,7 @@ export const wikiApplyDraft = registerMutation({ title: 1, writePermission: 1, readPermission: 1, + parentID: 1, }); if (!draft) { throw new Error('Draft not found'); @@ -31,6 +32,7 @@ export const wikiApplyDraft = registerMutation({ title: 1, writePermission: 1, readPermission: 1, + parentID: 1, }); if (!entry) { throw new Error('Entry not found'); @@ -40,6 +42,7 @@ export const wikiApplyDraft = registerMutation({ entry.title = draft.title; entry.writePermission = draft.writePermission; entry.readPermission = draft.readPermission; + entry.parentID = draft.parentID; await entry.save(); await draft.remove(); diff --git a/lab/wiki/api/WikiSearchAPI.ts b/lab/wiki/api/WikiSearchAPI.ts index 2d5635256fb797bbd095b44af4cb89dc09605e54..370ecbc46e5f89d0523eedf6d8a679a3f1a2c703 100644 --- a/lab/wiki/api/WikiSearchAPI.ts +++ b/lab/wiki/api/WikiSearchAPI.ts @@ -6,14 +6,14 @@ import type { BaseQLSelectionSet } from '@adornis/baseql/utils/queryGeneration.j import { Observable, of } from 'rxjs'; import { WikiEntry } from '../db/WikiEntry.js'; import { WikiChunkedSearchResult, WikiSearchResult } from '../db/WikiSearchResult.js'; -import { extractDataFromID, search } from '../server/redis.js'; +import { extractDataFromID, searchAll } from '../server/redis.js'; export const wikiSearch = registerQuery({ type: () => [WikiSearchResult], operationName: 'wikiSearch', resolve: (query: string) => { return async (gqlFields: BaseQLSelectionSet<WikiSearchResult>) => { - const res = await search(query, 100); + const res = await searchAll(query.toLowerCase(), 100); return await Promise.all( res.map(r => { return new WikiSearchResult({ @@ -43,7 +43,7 @@ export const wikiSearchSubscription = registerSubscription({ subscriber.complete(); return; } - search(query, 100).then(async res => { + searchAll(query.toLowerCase(), 100).then(async res => { const searchResults = res.map(r => { return new WikiSearchResult({ entryID: extractDataFromID(r.id, 'entry-id'), @@ -88,7 +88,7 @@ export const wikiSearchChunkedSubscription = registerSubscription({ subscriber.complete(); return; } - search(query, 100).then(async res => { + searchAll(query.toLowerCase(), 100).then(async res => { let searchResults = res.map(r => { return new WikiSearchResult({ entryID: extractDataFromID(r.id, 'entry-id'), diff --git a/lab/wiki/client/action-picker-provider.ts b/lab/wiki/client/action-picker-provider.ts index 611c27087b0e78c4bc6584b2810ffeb72b5206f6..05c09be9f4bf5f7a52eb5088266fa56927c848bd 100644 --- a/lab/wiki/client/action-picker-provider.ts +++ b/lab/wiki/client/action-picker-provider.ts @@ -77,7 +77,7 @@ function primaryKey(vals: Record<string, string>) { * @returns */ export const createWikiActionPickerProvider = (opts: { - onNavigate: (id: string, jumpTo?: string) => void; + onNavigate: (id: string, jumpTo?: string, highlightTags?: string) => void; scoreModifier?: (search: string, result: WikiSearchResult) => number; minScore?: number; }) => { @@ -100,8 +100,6 @@ export const createWikiActionPickerProvider = (opts: { results.forEach(parent => { if (opts.scoreModifier) { parent.results = parent.results?.map(r => { - console.log(parent.entry?.title, r.score); - r.score = r.score ?? 0; if (opts.scoreModifier) { r.score = opts.scoreModifier(lastSearch, r); @@ -124,48 +122,81 @@ export const createWikiActionPickerProvider = (opts: { } } - const children = Object.values(mapped) + let children = Object.values(mapped) .filter(r => (r.score ?? 0) >= (opts.minScore ?? 0)) .map(r => { const obj = r.metaObject(); const highestHeading = findHighestHeadingKey(obj); const highestAccordion = findHighestAccordionKey(obj); + const tags = obj['tags'] ?? ([] as string[]); + const searchedForTag = tags.includes(lastSearch.toLowerCase()); - let text = obj['summary'] ?? ''; + let text = `${(Math.min(1.0, r.score ?? 0) * 100).toFixed(0)}% • ${obj['summary'] ?? ''}`; // if (text.length > 70) { // text = text.slice(0, 70) + '...'; // } - console.log(highestAccordion, highestHeading, r.text, r.score); - return new Action({ - name: highestAccordion?.text ?? highestHeading?.text ?? r.text ?? 'No title', + name: highestAccordion?.text ?? highestHeading?.text ?? result.entry?.title ?? r.text ?? 'No title', description: text, action: () => { if (highestAccordion) { - opts.onNavigate(result.entryID ?? '', highestAccordion.text); + opts.onNavigate( + result.entryID ?? '', + highestAccordion.text, + searchedForTag ? lastSearch.toLowerCase() : undefined, + ); } else if (highestHeading) { - opts.onNavigate(result.entryID ?? '', highestHeading.text); + opts.onNavigate( + result.entryID ?? '', + highestHeading.text, + searchedForTag ? lastSearch.toLowerCase() : undefined, + ); } else { - opts.onNavigate(result.entryID ?? ''); + opts.onNavigate( + result.entryID ?? '', + undefined, + searchedForTag ? lastSearch.toLowerCase() : undefined, + ); } }, }); }); - return new Action({ - name: result.entry?.title ?? 'No title', - description: `${children?.length ?? 0} results`, - sideAction: { - title: 'Zur Seite', - icon: 'link', - action: () => { - opts.onNavigate(result.entryID ?? ''); + if (children.length == 0) { + return null; + } + + // deduplicate by name and description + const unique: Record<string, Action> = {}; + for (const c of children) { + const key = c.name + c.description; + if (!unique[key]) { + unique[key] = c; + } + } + + children = Object.values(unique); + + return { + score: result.results?.[0]?.score ?? 0, + action: new Action({ + name: result.entry?.title ?? 'No title', + description: `${children?.length ?? 0} ergebnisse • ${( + Math.min(1.0, result.results?.[0]?.score ?? 0) * 100 + ).toFixed(0)}% übereinstimmung`, + sideAction: { + title: 'Zur Seite', + icon: 'link', + action: () => { + opts.onNavigate(result.entryID ?? ''); + }, }, - }, - children: children, - }); - }); + children: children, + }), + }; + }) + .map(a => a?.action) as Action[]; }, ); }; diff --git a/lab/wiki/client/x-wiki-splitter-view.ts b/lab/wiki/client/x-wiki-splitter-view.ts index 0648820c1aee4ef1ba2726da9221d45ad2908c69..24fe8baab543a35865d57d2f6ec378a32fb4281d 100644 --- a/lab/wiki/client/x-wiki-splitter-view.ts +++ b/lab/wiki/client/x-wiki-splitter-view.ts @@ -30,7 +30,7 @@ export class XWikiSplitterView extends ChemistryLitElement { <x-text bold>Chunk ${i + 1}</x-text> </x-flex> <x-flex padding="sm"> - <x-text>${c.text}</x-text> + <x-text ${css({ whiteSpace: 'pre-wrap' })}>${c.text}</x-text> </x-flex> <x-flex padding="sm" ${css({ borderTop: '1px solid rgba(0,0,0,0.1)' })}> <x-text ${css({ fontFamily: 'monospace' })}> ${JSON.stringify(c.meta)}</x-text> diff --git a/lab/wiki/client/x-wiki-view.ts b/lab/wiki/client/x-wiki-view.ts index 4631286be8f94a08d97adffedbdea2a2900cdd7f..a0979aaa13ba0f579a0eb88be43f8b9938aba5a6 100644 --- a/lab/wiki/client/x-wiki-view.ts +++ b/lab/wiki/client/x-wiki-view.ts @@ -111,7 +111,7 @@ export class XWikiView extends ChemistryLitElement { /** * URL search parameters, used for jump-to functionality */ - @state() _params = new RXController( + @state() _jumpTo = new RXController( this, routingState.searchParams.get('jumpTo').pipe( tap(updated => { @@ -120,6 +120,18 @@ export class XWikiView extends ChemistryLitElement { ), ); + /** + * URL search parameters, used for highlight-tags functionality + */ + @state() _highlightTags = new RXController( + this, + routingState.searchParams.get('highlightTags').pipe( + tap(updated => { + this.requestUpdate(); + }), + ), + ); + /** * Reference to the content container element for scroll functionality */ @@ -133,7 +145,7 @@ export class XWikiView extends ChemistryLitElement { override updated(prop) { super.updated(prop); - if (this._params.value && this._contentRef.value) { + if (this._jumpTo.value && this._contentRef.value) { // * Remove the selected class from all elements queryAll('.wiki-search-selected').forEach(el => { el.classList.remove('wiki-search-selected'); @@ -141,10 +153,10 @@ export class XWikiView extends ChemistryLitElement { // * Find the matching element where the content is located let node = queryAll('*', this._contentRef.value).find(el => { - if (!this._params.value) return false; + if (!this._jumpTo.value) return false; if (!isChildOfAccordionOrHeader(el)) return false; if (el instanceof HTMLElement) { - return el.innerText.trim() === this._params.value.trim(); + return el.innerText.trim() === this._jumpTo.value.trim(); } return false; }) as Maybe<HTMLElement>; @@ -188,6 +200,31 @@ export class XWikiView extends ChemistryLitElement { } } } + + if (this._highlightTags.value && this._contentRef.value) { + queryAll('node-tag', this._contentRef.value).forEach(el => { + if (el.textContent?.toLowerCase().includes(this._highlightTags.value?.toLowerCase() ?? '')) { + el.classList.add('wiki-search-selected'); + } + + // * Now we need to travers the DOM back up and call .open() on all node-accordeon elements + let parent = el.parentElement; + let toOpen: Element[] = []; + while (parent) { + if (parent.nodeName === 'NODE-ACCORDEON') { + toOpen.push(parent); + } + parent = parent.parentElement; + } + + // * Open from outer to inner + toOpen.toReversed().forEach((el, i) => { + setTimeout(() => { + (el as any).open(); + }, i * 150); + }); + }); + } } /** diff --git a/lab/wiki/html-text-splitter.ts b/lab/wiki/html-text-splitter.ts index 125afb921d19df2d6ec6cb2e761da2e03f985182..779ce1b4c061d25e3af25159a7d65149e4377a8a 100644 --- a/lab/wiki/html-text-splitter.ts +++ b/lab/wiki/html-text-splitter.ts @@ -31,7 +31,7 @@ interface MetaDataSpan { export interface TextChunk { text: string; - meta: Record<string, string>; + meta: Record<string, string | string[]>; } /** @@ -56,6 +56,19 @@ function hasHeaderAsChild(node: Node | null | undefined) { return false; } +function hasNodeGridCellAsChild(node: Node | null | undefined) { + if (!node) return false; + for (let i = 0; i < node.childNodes.length; i++) { + if (node.childNodes[i]?.nodeName === 'NODE-GRID-CELL') { + return true; + } + if (hasNodeGridCellAsChild(node.childNodes[i])) { + return true; + } + } + return false; +} + /** * This function takes an array of strings and header objects, and returns a string that is the concatenation of all strings, * and an array of spans where each span is an object with start and end indices in the fullText string and a meta object @@ -81,7 +94,7 @@ function creatSpansAndFullText(fragments: Array<Header | Accordion | string>): { for (let i = 0; i < fragments.length; i++) { const fragment = fragments[i]; if (typeof fragment === 'string') { - fullText += fragment; + fullText += fragment + ' '; } else if (typeof fragment === 'object') { const lastSpan = spans[spans.length - 1]; if (lastSpan && lastSpan.end === -1) { @@ -207,7 +220,10 @@ async function chunkSpanBased( chunks.push(newChunk); } - return chunks; + return chunks.map(c => { + c.meta['tags'] = [...new Set(c.text.match(/#(\w+)/g)?.map(t => t.replace('#', '').toLowerCase()) ?? [])]; + return c; + }); } /** @@ -228,7 +244,6 @@ export async function splitHtml( // * walk all nodes. If a node doesn't contain a header as a child, // * we can collapse it to a single string. - const headers: Header[] = []; const fragments: Array<Header | Accordion | string> = []; let accordionLevel = 0; @@ -238,12 +253,13 @@ export async function splitHtml( const hTags = ['H1', 'H2', 'H3', 'H4', 'H5', 'H6']; for (let i = 0; i < hTags.length; i++) { if (node.nodeName.includes(hTags[i] ?? 'H1')) { - headers.push({ level: i + 1, text: node.textContent ?? '' }); - fragments.push({ level: i + 1, text: node.textContent ?? '' }); + fragments.push({ level: i + 1, text: (node.textContent ?? '').trim() }); + fragments.push('\n\n' + '#'.repeat(i + 1) + ' ' + (node.textContent ?? '') + '\n\n'); + return; } } - if (!hasHeaderAsChild(node)) { + if (!hasHeaderAsChild(node) && !hasNodeGridCellAsChild(node)) { fragments.push(node.textContent ?? ''); return; } @@ -260,6 +276,11 @@ export async function splitHtml( } } + if (node.nodeName === 'NODE-GRID-CELL') { + fragments.push('\n\n' + (node.textContent ?? '') + '\n\n'); + return; + } + for (let i = 0; i < node.childNodes.length; i++) { walk(node.childNodes[i]); } diff --git a/lab/wiki/print-components/x-wiki-print.ts b/lab/wiki/print-components/x-wiki-print.ts index cc79bb649fcf420b594bcf91b016a9a6c2c1e00e..99e9e0434302d57523591ce27254a7942526d6f0 100644 --- a/lab/wiki/print-components/x-wiki-print.ts +++ b/lab/wiki/print-components/x-wiki-print.ts @@ -1,21 +1,19 @@ import { getByID } from '@adornis/baseql/operations/mongo.js'; import { ChemistryLitElement } from '@adornis/chemistry/chemistry-lit-element.js'; import { RXController } from '@adornis/chemistry/controllers/RXController.js'; +import '@adornis/chemistry/elements/components/x-din-paper.js'; import { DesignSystem } from '@adornis/chemistry/elements/theming/design.js'; +import { xComponents } from '@adornis/chemistry/elements/x-components.js'; +import '@adornis/html-based-buildify/client/prosemirror-editor.js'; import { finalizePDF } from '@adornis/print/client/finalize-pdf.js'; import { routingState } from '@adornis/router/client/routing-state.js'; import { html } from 'lit'; import { customElement, property, state } from 'lit/decorators.js'; +import { createRef, ref, type Ref } from 'lit/directives/ref.js'; import { unsafeHTML } from 'lit/directives/unsafe-html.js'; import { combineLatest, filter, firstValueFrom, map, switchMap, tap, timer } from 'rxjs'; -import { WikiEntry } from '../db/WikiEntry.js'; -import { xComponents } from '@adornis/chemistry/elements/x-components.js'; -import { createRef, ref, type Ref } from 'lit/directives/ref.js'; - -// Components -import '@adornis/html-based-buildify/client/prosemirror-editor.js'; -import '@adornis/chemistry/elements/components/x-din-paper.js'; import { queryAll } from '../client/query-shadow-root.js'; +import { WikiEntry } from '../db/WikiEntry.js'; @customElement('x-wiki-print') export class XWikiPrint extends ChemistryLitElement { @@ -44,22 +42,29 @@ export class XWikiPrint extends ChemistryLitElement { contentRef: Ref<HTMLInputElement> = createRef(); protected async finalize() { + let done = false; + // * Wait for all images to load timer(500).subscribe(() => { + if (done) return; + if (this.contentRef.value) { const found = queryAll(`x-image, ${DesignSystem.prefix}-image`); + if (found.length === 0) { + done = true; finalizePDF(); - } else { - if (found.every(f => (f as any).loaded)) { - finalizePDF(); - } + } else if (found.every(f => (f as any).loaded)) { + done = true; + setTimeout(() => finalizePDF(), 500); } } }); // * Fallback after 20s await firstValueFrom(timer(20000)); + if (done) return; + finalizePDF(); } @@ -72,9 +77,9 @@ export class XWikiPrint extends ChemistryLitElement { const designSystemPrefix = DesignSystem.prefix; return xComponents( html`<x-din-paper> - <x-flex ${ref(this.contentRef)} - >${unsafeHTML(content.replace(/<([\/]{0,1})x-/g, `<$1${designSystemPrefix}-`))}</x-flex - > + <x-flex ${ref(this.contentRef)}> + ${unsafeHTML(content.replace(/<([\/]{0,1})x-/g, `<$1${designSystemPrefix}-`))} + </x-flex> </x-din-paper> `, ); } diff --git a/lab/wiki/server/redis.ts b/lab/wiki/server/redis.ts index 7ac0ec4c79345d04a33fd48d2d32efef6ae57708..13232fd9eede192153e412de67d8932bb8ce65e8 100644 --- a/lab/wiki/server/redis.ts +++ b/lab/wiki/server/redis.ts @@ -9,6 +9,9 @@ import { SchemaFieldTypes, VectorAlgorithms, createClient } from 'redis'; import { WikiEntry } from '../db/WikiEntry.js'; import { splitHtml, type TextChunk } from '../html-text-splitter.js'; +/** API endpoint for Cohere */ +const RERANK_API = 'https://api.cohere.com/v2/rerank'; + /** Prefix for wiki entry keys in Redis */ export const ENTRY_KEY_PREFIX = 'adornis-wiki-entry'; @@ -21,6 +24,7 @@ export const MAX_CONTEXT = 8191; /** Configuration for Redis */ const redisConfig = new Config('redis-config', { REDIS_URL: { env: true, name: 'REDIS_URL', client: false, required: true }, + RERANK_API_KEY: { env: true, name: 'RERANK_API_KEY', client: false, required: false, defaultVal: '' }, }); /** Batch size for processing embeddings */ @@ -75,7 +79,7 @@ export const REDIS_CLIENT = RedisManager.getInstance(); // Initialize the client REDIS_CLIENT.init() - .then(createRedisIndex) + .then(() => createRedisIndex()) .catch(err => logger.error('Failed to initialize Redis client', err)); // Shutdown the client @@ -87,13 +91,20 @@ addGlobalHook('cleanup', () => { * Creates the Redis index for wiki entries * @async */ -export async function createRedisIndex() { +export async function createRedisIndex(dropOldIndex?: boolean) { const client = REDIS_CLIENT.getClient(); // Check if index exists try { await client.ft.info(ENTRY_INDEX_KEY); logger.info('Redis index already exists'); + + if (dropOldIndex) { + await client.ft.dropIndex(ENTRY_INDEX_KEY); + logger.info('Redis index dropped successfully'); + throw new Error('Redis index dropped successfully'); + } + return; } catch (err) { // Index doesn't exist, create it @@ -110,6 +121,11 @@ export async function createRedisIndex() { BLOCK_SIZE: 111, AS: 'embedding', }, + '$.text': { + // Add this field + type: SchemaFieldTypes.TEXT, + AS: 'text', + }, }, { ON: 'JSON', @@ -154,6 +170,18 @@ export function setSelectedChunkTransformer(transformer: ChunkTransformer) { selectedChunkTransformer = transformer; } +function metaToString(meta: Record<string, string | string[]>) { + return Object.entries(meta) + .map(([k, v]) => { + if (typeof v === 'string') { + return v && v.trim().length > 0 ? `${k}: ${v}` : `${k}`; + } else { + return v && v.length > 0 ? `${k}: ${v.join(', ')}` : `${k}`; + } + }) + .join(' '); +} + /** * Simplest chunk transformer that just returns the text of the chunk with some meta data * @param chunk The chunk @@ -161,15 +189,18 @@ export function setSelectedChunkTransformer(transformer: ChunkTransformer) { * @returns The chunk text */ export function defaultChunkTransformer(chunk: TextChunk, entry: EntityData<WikiEntry>) { - return { - text: `# ${entry.title ?? ''} + const fullText = `# ${entry.title ?? ''} - ${chunk.text} +${chunk.text} - ${Object.entries(chunk.meta) - .map(([k, v]) => `${k}: ${v}`) - .join(' ')}`, - meta: {}, +${metaToString(chunk.meta)}`; + return { + text: fullText, + meta: { + fullText, + context: '', + summary: '', + }, }; } @@ -183,7 +214,7 @@ export function stripHtml(html: string): string { // Remove HTML tags and decode HTML entities return html - .replace(/<[^>]*>/g, '') // Remove HTML tags + .replace(/<[^>]*>/g, ' ') // Remove HTML tags .replace(/ /g, ' ') // Replace with space .replace(/&/g, '&') // Replace & with & .replace(/</g, '<') // Replace < with < @@ -240,18 +271,20 @@ ${context} ], }); - return { - text: `# ${entry.title ?? ''} + const fullText = `# ${entry.title ?? ''} - ${context} +${context} + +${chunk.text} - ${chunk.text} +${metaToString(chunk.meta)}`; - ${Object.entries(chunk.meta) - .map(([k, v]) => `${k}: ${v}`) - .join(' ')}`, + return { + text: fullText, meta: { + fullText, summary, + context, }, }; } @@ -451,6 +484,75 @@ export async function search(search: string, limit: number = 5) { return results.documents; } +export async function searchFullText(search: string[], limit: number = 5) { + const client = REDIS_CLIENT.getClient(); + const searchQuery = search + .map(term => { + // Escape special characters in the entire term + const escapedTerm = term.replace(/[\\"`]/g, '\\$&').trim(); + // Use simple text matching + return `${escapedTerm}`; + }) + .join(' | '); // Join terms with OR operator + + const finalQuery = `@text:(${searchQuery})`; + + const results = await client.ft.search(ENTRY_INDEX_KEY, finalQuery, { + LIMIT: { + from: 0, + size: limit, + }, + RETURN: ['$.text', '$.meta'], + }); + return results.documents.map(s => { + s.value['score'] = 1.0; + return s; + }); +} + +export async function searchAll(query: string, limit: number = 5) { + if (query.length < 3) { + return []; + } + + const results = [searchFullText([query], limit), search(query, limit)]; + + const [fullTextResults, redisResults] = await Promise.all(results); + const allResults = [...(fullTextResults ?? []), ...(redisResults ?? [])]; + + const key = redisConfig.get('RERANK_API_KEY'); + if (key === '') { + return allResults.sort((a: any, b: any) => (b['value']?.['score'] ?? 0) - (a['value']?.['score'] ?? 0)); + } + + const response = await fetch(RERANK_API, { + body: JSON.stringify({ + documents: allResults.map(r => { + return JSON.parse((r.value['$.meta'] ?? '{}') as string).fullText; + }), + query: query, + model: 'rerank-v3.5', + }), + headers: { + Authorization: `Bearer ${key}`, + 'Content-Type': 'application/json', + }, + method: 'POST', + }); + + const responseJson = (await response.json()) as { + results: { index: number; relevance_score: number }[]; + }; + + for (const result of responseJson.results) { + allResults[result.index]!.value['score'] = result.relevance_score; + } + + console.log(responseJson); + + return allResults.sort((a: any, b: any) => (b['value']?.['score'] ?? 0) - (a['value']?.['score'] ?? 0)); +} + /** * Extracts data from an ID * @param id The id to extract data from @@ -470,6 +572,8 @@ export function extractDataFromID(id: string, dataType: 'entry-id' | 'prefix') { * Forces a re-embedding of all wiki entries */ export async function forceReEmbedding() { + await createRedisIndex(true); + const rawWikiCollection = await getRawCollection<EntityData<WikiEntry>>(WikiEntry._collectionName); await rawWikiCollection.updateMany( {