From 19220010c1d2ffdd57f4e07f14cd02928fb5cdfa Mon Sep 17 00:00:00 2001 From: "me@rodrigorodriguez.com" Date: Sun, 27 Oct 2024 13:07:05 -0300 Subject: [PATCH] new(all): Improvements in IPC and MainWindow. --- dist/components/App.js | 12 +- dist/main/main.js | 251 ++++++++++++++++++++---- dist/preload/preload.js | 11 ++ dist/services/openai.service.js | 2 +- dist/services/player.service.js | 106 +++++----- dist/services/recorder.service.js | 281 ++++++++++++--------------- src/components/App.tsx | 18 +- src/main/main.ts | 306 ++++++++++++++++++++++++----- src/preload/preload.ts | 14 ++ src/renderer/index.html | 2 +- src/services/openai.service.ts | 2 +- src/services/player.service.ts | 125 ++++++------ src/services/recorder.service.ts | 308 +++++++++++++----------------- src/services/types.ts | 26 +++ 14 files changed, 927 insertions(+), 537 deletions(-) diff --git a/dist/components/App.js b/dist/components/App.js index 5128888..4467674 100644 --- a/dist/components/App.js +++ b/dist/components/App.js @@ -55,13 +55,15 @@ const App = () => { console.error('Playback error:', error); } }; - return (react_1.default.createElement("div", { className: "p-4" }, - react_1.default.createElement("h1", { className: "text-2xl font-bold mb-4" }, "BotDesktop Automation"), - react_1.default.createElement("div", { className: "space-x-4 mb-4" }, + return (react_1.default.createElement("div", { className: "p-4 h-auto" }, + react_1.default.createElement("h1", { className: "text-2xl font-bold mb-4" }, "General Bots Desktop"), + react_1.default.createElement("div", { className: "space-x-4 mb-4 h-auto" }, react_1.default.createElement("button", { className: `px-4 py-2 rounded ${recording ? 'bg-red-500' : 'bg-blue-500'} text-white`, onClick: recording ? handleStopRecording : handleStartRecording }, recording ? 'Stop Recording' : 'Start Recording'), react_1.default.createElement("button", { className: "px-4 py-2 rounded bg-green-500 text-white", onClick: handlePlayback, disabled: !basicCode }, "Play Recording")), - react_1.default.createElement("div", { className: "mt-4" }, + react_1.default.createElement("div", { className: "mt-4 h-20" }, react_1.default.createElement("h2", { className: "text-xl font-bold mb-2" }, "Generated BASIC Code:"), - react_1.default.createElement("pre", { className: "bg-gray-100 p-2 rounded border" }, basicCode)))); + react_1.default.createElement("pre", { className: "h-20 min-h-100 bg-gray-100 p-2 rounded border" }, basicCode)), + react_1.default.createElement("div", { className: "mb-4" }, + react_1.default.createElement("a", { href: "https://github.com/General Bots" }, "General Bots")))); }; exports.default = App; diff --git a/dist/main/main.js b/dist/main/main.js index 517e3d2..460f5e7 100644 --- a/dist/main/main.js +++ b/dist/main/main.js @@ -23,20 +23,35 @@ var __importStar = (this && this.__importStar) || function (mod) { return result; }; Object.defineProperty(exports, "__esModule", { value: true }); +exports.cleanupAudioCapture = cleanupAudioCapture; require('dotenv').config(); require('electron-require'); const electron_1 = require("electron"); -const path = __importStar(require("path")); const electron_2 = require("electron"); +const path = __importStar(require("path")); +const electron_3 = require("electron"); const recorder_service_1 = require("../services/recorder.service"); const player_service_1 = require("../services/player.service"); +const audioCapture = { + mediaRecorder: null, + audioStream: null, + analyserNode: null, + audioData: null, + isCapturing: false +}; const recorder = new recorder_service_1.RecorderService(); const player = new player_service_1.PlayerService(); function createWindow() { - const mainWindow = new electron_1.BrowserWindow({ - width: 1200, - height: 800, + const mainWindow = new electron_2.BrowserWindow({ + width: 700, + height: 500, + backgroundColor: "grey", + center: true, + maximizable: false, + thickFrame: true, + autoHideMenuBar: true, webPreferences: { + experimentalFeatures: true, nodeIntegrationInWorker: true, nodeIntegration: true, nodeIntegrationInSubFrames: true, @@ -44,6 +59,8 @@ function createWindow() { preload: path.join(__dirname, '../preload/preload.js') } }); + mainWindow.setAutoHideMenuBar(true); + mainWindow.setMaximizable(false); if (process.env.NODE_ENV === 'development') { mainWindow.loadURL('http://localhost:8080'); mainWindow.webContents.openDevTools(); @@ -51,6 +68,76 @@ function createWindow() { else { mainWindow.loadFile(path.join(__dirname, '../../src/renderer/index.html')); } + electron_2.ipcMain.handle('mouse-event', recorder.handleMouseEvent.bind(recorder)); + electron_2.ipcMain.handle('keyboard-event', recorder.handleKeyboardEvent.bind(recorder)); + // Handler to capture the entire screen + electron_2.ipcMain.handle('get-screenshot', async () => { + console.log('get-screenshot called'); + const sources = await electron_2.desktopCapturer.getSources({ types: ['screen'] }); + const screenSource = sources[0]; // Get the first screen source + const { thumbnail } = screenSource; // Thumbnail is a native image + return thumbnail.toPNG(); // Return the screenshot as PNG buffer + }); + electron_2.ipcMain.handle('start-recording', async () => { + console.log('start-recording called'); + await recorder.startRecording(); + }); + electron_2.ipcMain.handle('stop-recording', async () => { + console.log('stop-recording called'); + return await recorder.stopRecording(); + }); + electron_2.ipcMain.handle('execute-basic-code', async (_, code) => { + console.log('execute-basic-code called with:', code); + await player.executeBasicCode(code); + }); + electron_2.ipcMain.handle('check-microphone-permission', async () => { + console.log('check-microphone-permission called'); + if (process.platform === 'darwin') { + const status = await electron_3.systemPreferences.getMediaAccessStatus('microphone'); + if (status !== 'granted') { + const success = await electron_3.systemPreferences.askForMediaAccess('microphone'); + return success; + } + return true; + } + return true; // On Windows/Linux, permissions are handled by the OS + }); + electron_2.ipcMain.handle('start-microphone-capture', async (event) => { + debugger; + const window = electron_2.BrowserWindow.fromWebContents(event.sender); + if (!window) { + throw new Error('No window found for this request'); + } + return startMicrophoneCapture(window); + }); + electron_2.ipcMain.handle('stop-microphone-capture', async (event) => { + const window = electron_2.BrowserWindow.fromWebContents(event.sender); + if (!window) { + throw new Error('No window found for this request'); + } + return stopMicrophoneCapture(window); + }); + electron_2.ipcMain.handle('start-microphone-capture', async (event, ...args) => { + // Perform asynchronous microphone capture logic here + try { + const result = await startMicrophoneCapture(args[0]); // Assuming this function is async + return result; + } + catch (error) { + console.error("Error during microphone capture:", error); + throw error; // Send the error back to the renderer + } + }); + electron_2.ipcMain.handle('stop-microphone-capture', async (event, ...args) => { + try { + const result = await stopMicrophoneCapture(args[0]); + return result; + } + catch (error) { + console.error("Error stopping microphone capture:", error); + throw error; // Send the error back to the renderer + } + }); } electron_1.app.whenReady().then(createWindow); electron_1.app.on('window-all-closed', () => { @@ -59,44 +146,128 @@ electron_1.app.on('window-all-closed', () => { } }); electron_1.app.on('activate', () => { - if (electron_1.BrowserWindow.getAllWindows().length === 0) { + if (electron_2.BrowserWindow.getAllWindows().length === 0) { createWindow(); } }); -electron_1.ipcMain.handle('mouse-event', recorder.mouseHandleEvent.bind(recorder)); -electron_1.ipcMain.handle('keyboard-event', recorder.keyboardHandleEvent.bind(recorder)); -electron_1.ipcMain.handle('screenshot-captured', recorder.screenshotHandleEvent.bind(recorder)); -// Handler to capture the entire screen -electron_1.ipcMain.handle('get-screenshot', async () => { - console.log('get-screenshot called'); - const sources = await electron_1.desktopCapturer.getSources({ types: ['screen'] }); - const screenSource = sources[0]; // Get the first screen source - const { thumbnail } = screenSource; // Thumbnail is a native image - return thumbnail.toPNG(); // Return the screenshot as PNG buffer -}); -electron_1.ipcMain.handle('start-recording', async () => { - console.log('start-recording called'); - await recorder.startRecording(); -}); -electron_1.ipcMain.handle('stop-recording', async () => { - console.log('stop-recording called'); - return await recorder.stopRecording(); -}); -electron_1.ipcMain.handle('execute-basic-code', async (_, code) => { - console.log('execute-basic-code called with:', code); - await player.executeBasicCode(code); -}); -electron_1.ipcMain.handle('check-microphone-permission', async () => { - console.log('check-microphone-permission called'); - if (process.platform === 'darwin') { - const status = await electron_2.systemPreferences.getMediaAccessStatus('microphone'); - if (status !== 'granted') { - const success = await electron_2.systemPreferences.askForMediaAccess('microphone'); - return success; - } - return true; - } - return true; // On Windows/Linux, permissions are handled by the OS -}); // Enable required permissions electron_1.app.commandLine.appendSwitch('enable-speech-dispatcher'); +// Register cleanup on app quit +electron_1.app.on('will-quit', cleanupAudioCapture); +// Function to get the focused window or first available window +function getFocusedWindow() { + const focusedWindow = electron_2.BrowserWindow.getFocusedWindow(); + if (focusedWindow) + return focusedWindow; + const windows = electron_2.BrowserWindow.getAllWindows(); + return windows.length > 0 ? windows[0] : null; +} +// Function to safely send to window +function sendToWindow(channel, ...args) { + const window = getFocusedWindow(); + if (window && !window.isDestroyed()) { + window.webContents.send(channel, ...args); + } +} +async function startMicrophoneCapture(window) { + console.log('Starting microphone capture...'); + try { + // Request microphone access + //@ts-ignore + const stream = await window.myApi.startMicrophone(); + audioCapture.audioStream = stream; + // Set up audio analysis + const audioContext = new (window.AudioContext || window.webkitAudioContext)(); + const sourceNode = audioContext.createMediaStreamSource(stream); + audioCapture.analyserNode = audioContext.createAnalyser(); + audioCapture.analyserNode.fftSize = 2048; + sourceNode.connect(audioCapture.analyserNode); + audioCapture.audioData = new Uint8Array(audioCapture.analyserNode.frequencyBinCount); + // Set up MediaRecorder + audioCapture.mediaRecorder = new MediaRecorder(stream, { + mimeType: 'audio/webm;codecs=opus' + }); + // Handle audio data + audioCapture.mediaRecorder.ondataavailable = (event) => { + if (event.data.size > 0 && !window.isDestroyed()) { + // Convert blob to buffer and send to renderer + const reader = new FileReader(); + reader.onloadend = () => { + const buffer = Buffer.from(reader.result); + window.webContents.send('audio-chunk', buffer); + }; + reader.readAsArrayBuffer(event.data); + } + }; + // Start recording + audioCapture.mediaRecorder.start(1000); // Capture in 1-second chunks + audioCapture.isCapturing = true; + // Start audio level monitoring + monitorAudioLevels(window); + console.log('Microphone capture started successfully'); + } + catch (error) { + console.error('Failed to start microphone capture:', error); + throw error; + } +} +function monitorAudioLevels(window) { + if (!audioCapture.isCapturing || !audioCapture.analyserNode || !audioCapture.audioData || window.isDestroyed()) { + return; + } + // Get audio level data + audioCapture.analyserNode.getByteFrequencyData(audioCapture.audioData); + // Calculate average volume level (0-1) + const average = audioCapture.audioData.reduce((acc, value) => acc + value, 0) / + audioCapture.audioData.length / + 255; + // Send level to renderer + if (!window.isDestroyed()) { + window.webContents.send('audio-level', average); + } + // Continue monitoring + requestAnimationFrame(() => monitorAudioLevels(window)); +} +function stopMicrophoneCapture(window) { + console.log('Stopping microphone capture...'); + try { + if (audioCapture.mediaRecorder && audioCapture.mediaRecorder.state !== 'inactive') { + audioCapture.mediaRecorder.stop(); + } + if (audioCapture.audioStream) { + audioCapture.audioStream.getTracks().forEach(track => track.stop()); + } + if (audioCapture.analyserNode) { + audioCapture.analyserNode.disconnect(); + } + audioCapture.isCapturing = false; + audioCapture.mediaRecorder = null; + audioCapture.audioStream = null; + audioCapture.analyserNode = null; + audioCapture.audioData = null; + if (!window.isDestroyed()) { + window.webContents.send('microphone-stopped'); + } + console.log('Microphone capture stopped successfully'); + } + catch (error) { + console.error('Failed to stop microphone capture:', error); + throw error; + } +} +// Error handler for audio processing +function handleAudioError(error, window) { + console.error('Audio processing error:', error); + stopMicrophoneCapture(window); + // Notify renderer of error if window still exists + if (!window.isDestroyed()) { + window.webContents.send('audio-error', error.message); + } +} +// Clean up resources when app is closing +function cleanupAudioCapture() { + const window = getFocusedWindow(); + if (window) { + stopMicrophoneCapture(window); + } +} diff --git a/dist/preload/preload.js b/dist/preload/preload.js index 158bcee..e718dda 100644 --- a/dist/preload/preload.js +++ b/dist/preload/preload.js @@ -1,6 +1,17 @@ const { ipcRenderer } = require('electron'); +const { contextBridge } = require('electron'); +const audioContext = new (window.AudioContext || window.webkitAudioContext)(); //@ts-nocheck window.myApi = { + startMicrophone: () => { + alert(1); + if (navigator.mediaDevices) { + return navigator.mediaDevices.getUserMedia({ audio: true }); + } + else { + console.error("MediaDevices API not supported"); + } + }, sendMessage: (message) => { console.log('[preload] sendMessage called with:', message); return ipcRenderer.send('message-from-renderer', message); diff --git a/dist/services/openai.service.js b/dist/services/openai.service.js index ddd4ff2..2b5df05 100644 --- a/dist/services/openai.service.js +++ b/dist/services/openai.service.js @@ -51,7 +51,7 @@ class OpenAIService { role: 'system', content: `You are an AI that analyzes screenshots and voice commands to determine user intentions for automation. You should identify UI elements and return specific actions in JSON format. - Focus on the area near the cursor position when relevant.` + Focus on the area near the field ${context.identifier}.` }, { role: 'user', diff --git a/dist/services/player.service.js b/dist/services/player.service.js index a795a94..b348cad 100644 --- a/dist/services/player.service.js +++ b/dist/services/player.service.js @@ -2,78 +2,86 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.PlayerService = void 0; const electron_1 = require("electron"); -const openai_service_1 = require("./openai.service"); +const openai_service_1 = require("../services/openai.service"); class PlayerService { constructor() { + this.currentScreenshot = ''; + this.isPlaying = false; console.log('[PlayerService] Initializing'); this.openAIService = new openai_service_1.OpenAIService(); } async executeBasicCode(code) { console.log('[PlayerService] executeBasicCode called with:', code); + this.isPlaying = true; const lines = code.split('\n'); - for (const line of lines) { - if (line.trim().startsWith('REM') || line.trim() === '') - continue; - const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/); - if (!match) - continue; - const [_, command, identifier, value] = match; - console.log('[PlayerService] Executing command:', { command, identifier, value }); - await this.executeCommand(command, identifier, value); - await new Promise(resolve => setTimeout(resolve, 500)); + try { + for (const line of lines) { + if (!this.isPlaying) + break; + if (line.trim().startsWith('REM') || line.trim() === '') + continue; + const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/); + if (!match) + continue; + const [_, command, identifier, value] = match; + console.log('[PlayerService] Executing command:', { command, identifier, value }); + await this.captureAndAnalyzeScreen(); + await this.executeCommand(command, identifier, value); + await new Promise(resolve => setTimeout(resolve, 500)); + } } + catch (error) { + console.error('[PlayerService] Execution error:', error); + this.isPlaying = false; + throw error; + } + } + async captureAndAnalyzeScreen() { + console.log('[PlayerService] captureAndAnalyzeScreen called'); + const sources = await electron_1.ipcRenderer.invoke('get-screenshot'); + this.currentScreenshot = sources[0].thumbnail; } async executeCommand(command, identifier, value) { console.log('[PlayerService] executeCommand called with:', { command, identifier, value }); - const screenshotPath = await this.captureScreen(); - console.log('[PlayerService] Screen captured at:', screenshotPath); - const analysis = await this.openAIService.analyzeScreen(screenshotPath); - const element = analysis.elements.find(e => e.identifier === identifier); - if (!element) - throw new Error(`Element not found: ${identifier}`); + const element = await this.openAIService.analyzeScreenWithContext({ + screenshot: this.currentScreenshot, + transcription: '', + identifier, cursorPosition: null + }); + //@ts-nocheck + if (!element) { + console.warn(`[PlayerService] Element not found: ${identifier}, retrying with fresh analysis`); + await this.captureAndAnalyzeScreen(); + const newElement = await this.openAIService.analyzeScreenWithContext({ + screenshot: this.currentScreenshot, + transcription: '', + cursorPosition: await electron_1.ipcRenderer.invoke('get-cursor-position'), + identifier + }); + if (!newElement) + throw new Error(`Element not found after retry: ${identifier}`); + } const centerX = element.bounds.x + element.bounds.width / 2; const centerY = element.bounds.y + element.bounds.height / 2; switch (command) { case 'CLICK': console.log('[PlayerService] Simulating click at:', { centerX, centerY }); - await this.simulateClick(centerX, centerY); + await electron_1.ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY }); break; case 'TYPE': console.log('[PlayerService] Simulating type:', { centerX, centerY, value }); - await this.simulateClick(centerX, centerY); - await this.simulateTyping(value || ''); + await electron_1.ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY }); + await electron_1.ipcRenderer.invoke('simulate-type', { text: value || '' }); + break; + case 'MOVE': + console.log('[PlayerService] Simulating move:', { centerX, centerY }); + await electron_1.ipcRenderer.invoke('simulate-move', { x: centerX, y: centerY }); break; } } - async captureScreen() { - console.log('[PlayerService] captureScreen called'); - return new Promise((resolve, reject) => { - electron_1.ipcMain.once('screen-captured', (_, screenshotPath) => { - console.log('[PlayerService] Screen captured event received:', screenshotPath); - resolve(screenshotPath); - }); - electron_1.ipcMain.emit('capture-screen'); - }); - } - async simulateClick(x, y) { - console.log('[PlayerService] simulateClick called with:', { x, y }); - return new Promise((resolve) => { - electron_1.ipcMain.once('click-completed', () => { - console.log('[PlayerService] Click completed'); - resolve(); - }); - electron_1.ipcMain.emit('simulate-click', { x, y }); - }); - } - async simulateTyping(text) { - console.log('[PlayerService] simulateTyping called with:', text); - return new Promise((resolve) => { - electron_1.ipcMain.once('typing-completed', () => { - console.log('[PlayerService] Typing completed'); - resolve(); - }); - electron_1.ipcMain.emit('simulate-typing', { text }); - }); + stop() { + console.log('[PlayerService] Stopping playback'); + this.isPlaying = false; } } exports.PlayerService = PlayerService; diff --git a/dist/services/recorder.service.js b/dist/services/recorder.service.js index 0ac1c5c..263398e 100644 --- a/dist/services/recorder.service.js +++ b/dist/services/recorder.service.js @@ -30,57 +30,72 @@ const path = __importStar(require("path")); const fs = __importStar(require("fs")); class RecorderService { constructor() { - this.events = []; + this.eventGroups = []; + this.currentEvents = []; this.recording = false; this.currentScreenshot = ''; - this.lastTranscription = ''; - this.currentAudioFile = ''; + this.audioBuffer = []; + this.isListeningToMicrophone = false; this.silenceTimer = null; this.isProcessingAudio = false; - this.handleAudioLevel = async (_, level) => { - console.log('RecorderService.handleAudioLevel()', { level }); - if (!this.recording) + this.SILENCE_THRESHOLD = 0.01; + this.SILENCE_DURATION = 1500; // 1.5 seconds of silence to trigger processing + this.MIN_AUDIO_DURATION = 500; // Minimum audio duration to process + this.handleAudioLevel = (_, level) => { + if (!this.recording || !this.isListeningToMicrophone) return; - const SILENCE_THRESHOLD = 0.01; - const SILENCE_DURATION = 1000; - if (level < SILENCE_THRESHOLD) { - if (!this.silenceTimer && !this.isProcessingAudio) { - console.log('RecorderService.handleAudioLevel() - Setting silence timer'); + if (level < this.SILENCE_THRESHOLD) { + if (!this.silenceTimer && !this.isProcessingAudio && this.audioBuffer.length > 0) { this.silenceTimer = setTimeout(async () => { if (this.recording) { - await this.processSilence(); + await this.processCapturedAudio(); } - }, SILENCE_DURATION); + }, this.SILENCE_DURATION); } } else { if (this.silenceTimer) { - console.log('RecorderService.handleAudioLevel() - Clearing silence timer'); clearTimeout(this.silenceTimer); this.silenceTimer = null; } } }; - this.handleAudioChunk = async (_, chunk) => { - console.log('RecorderService.handleAudioChunk()', { chunkSize: chunk.length }); + this.handleAudioChunk = (_, chunk) => { + if (!this.recording || !this.isListeningToMicrophone) + return; + this.audioBuffer.push(chunk); + }; + this.handleKeyboardEvent = async (_, event) => { if (!this.recording) return; - try { - const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`); - fs.writeFileSync(audioFilePath, chunk); - if (this.silenceTimer) { - clearTimeout(this.silenceTimer); - this.silenceTimer = null; - await this.processAudioFile(audioFilePath); - } - } - catch (error) { - console.error('RecorderService.handleAudioChunk() error:', error); + this.currentEvents.push({ + type: 'type', + identifier: event.key, + value: event.key, + timestamp: Date.now(), + narration: '' + }); + }; + this.handleMouseEvent = async (_, event) => { + if (!this.recording) + return; + const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot); + const element = this.findElementAtPosition(analysis, event.clientX, event.clientY); + if (element) { + this.currentEvents.push({ + type: 'click', + identifier: element.identifier, + timestamp: Date.now(), + narration: '' + }); } }; console.log('RecorderService.constructor()'); this.openAIService = new openai_service_1.OpenAIService(); this.tempDir = path.join(process.cwd(), 'temp_recordings'); + this.ensureTempDirectory(); + } + ensureTempDirectory() { if (!fs.existsSync(this.tempDir)) { fs.mkdirSync(this.tempDir, { recursive: true }); } @@ -89,10 +104,11 @@ class RecorderService { console.log('RecorderService.startRecording()'); try { this.recording = true; - this.events = []; - await this.setupAudioRecording(); - await this.requestScreenshot(); - electron_1.ipcRenderer.on('keyboard-event', this.keyboardHandleEvent); + this.eventGroups = []; + this.currentEvents = []; + await this.startMicrophoneCapture(); + await this.captureInitialScreenshot(); + this.setupEventListeners(); } catch (error) { console.error('RecorderService.startRecording() error:', error); @@ -100,171 +116,118 @@ class RecorderService { throw error; } } - async setupAudioRecording() { - console.log('RecorderService.setupAudioRecording()'); + async startMicrophoneCapture() { + console.log('RecorderService.startMicrophoneCapture()'); try { + this.isListeningToMicrophone = true; electron_1.ipcRenderer.on('audio-level', this.handleAudioLevel); electron_1.ipcRenderer.on('audio-chunk', this.handleAudioChunk); + await electron_1.ipcRenderer.invoke('start-microphone-capture'); } catch (error) { - console.error('RecorderService.setupAudioRecording() error:', error); - throw new Error(`Failed to setup audio recording: ${error.message}`); + console.error('Failed to start microphone capture:', error); + throw new Error(`Microphone initialization failed: ${error.message}`); } } - async processSilence() { - console.log('RecorderService.processSilence()'); - if (this.isProcessingAudio) + async processCapturedAudio() { + if (this.isProcessingAudio || this.audioBuffer.length === 0) return; this.isProcessingAudio = true; + const combinedBuffer = Buffer.concat(this.audioBuffer); + this.audioBuffer = []; // Clear the buffer try { - const audioFilePath = await electron_1.ipcRenderer.invoke('save-audio-chunk'); - console.log('RecorderService.processSilence() - Audio saved to:', audioFilePath); - if (audioFilePath) { - this.currentAudioFile = audioFilePath; - await this.processAudioFile(audioFilePath); - await this.requestScreenshot(); + const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`); + fs.writeFileSync(audioFilePath, combinedBuffer); + const transcription = await this.openAIService.transcribeAudio(new Blob([combinedBuffer], { type: 'audio/wav' })); + if (transcription.text.trim()) { + await this.processNarrationWithEvents(transcription.text); } + fs.unlinkSync(audioFilePath); } catch (error) { - console.error('RecorderService.processSilence() error:', error); + console.error('Audio processing error:', error); } finally { this.isProcessingAudio = false; } } - async processAudioFile(audioFilePath) { - console.log('RecorderService.processAudioFile()', { audioFilePath }); - try { - const audioBuffer = fs.readFileSync(audioFilePath); - const transcription = await this.openAIService.transcribeAudio(new Blob([audioBuffer], { type: 'audio/wav' })); - console.log('RecorderService.processAudioFile() - Transcription:', transcription); - if (transcription.text.trim()) { - await this.processTranscription(transcription); - } - fs.unlinkSync(audioFilePath); - } - catch (error) { - console.error('RecorderService.processAudioFile() error:', error); - } - } - async processTranscription(transcription) { - console.log('RecorderService.processTranscription()', { transcription }); - this.lastTranscription = transcription.text; - const cursorPosition = await electron_1.ipcRenderer.invoke('get-cursor-position'); - console.log('RecorderService.processTranscription() - Cursor position:', cursorPosition); - const analysis = await this.openAIService.analyzeScreenWithContext({ + async processNarrationWithEvents(narration) { + if (this.currentEvents.length === 0) + return; + const eventGroup = { + narration, + events: [...this.currentEvents], screenshot: this.currentScreenshot, - transcription: this.lastTranscription, - cursorPosition + timestamp: Date.now() + }; + this.eventGroups.push(eventGroup); + this.currentEvents = []; // Clear current events for next group + await this.captureInitialScreenshot(); // Get fresh screenshot for next group + } + setupEventListeners() { + electron_1.ipcRenderer.on('keyboard-event', this.handleKeyboardEvent); + electron_1.ipcRenderer.on('mouse-event', this.handleMouseEvent); + } + async captureInitialScreenshot() { + const sources = await electron_1.ipcRenderer.invoke('get-screenshot'); + this.currentScreenshot = sources[0].thumbnail; + } + findElementAtPosition(analysis, x, y) { + return analysis.elements.find(element => { + const bounds = element.bounds; + return x >= bounds.x && + x <= bounds.x + bounds.width && + y >= bounds.y && + y <= bounds.y + bounds.height; }); - console.log('RecorderService.processTranscription() - Screen analysis:', analysis); - if (analysis) { - this.events.push({ - type: analysis.type, - identifier: analysis.identifier, - value: analysis.value, - timestamp: Date.now(), - narration: this.lastTranscription - }); - } } async stopRecording() { console.log('RecorderService.stopRecording()'); + // Process any remaining audio + if (this.audioBuffer.length > 0) { + await this.processCapturedAudio(); + } + this.cleanup(); + return this.generateBasicCode(); + } + cleanup() { this.recording = false; + this.isListeningToMicrophone = false; if (this.silenceTimer) { clearTimeout(this.silenceTimer); this.silenceTimer = null; } electron_1.ipcRenderer.removeListener('audio-level', this.handleAudioLevel); electron_1.ipcRenderer.removeListener('audio-chunk', this.handleAudioChunk); - electron_1.ipcRenderer.removeListener('keyboard-event', this.keyboardHandleEvent); - if (this.currentAudioFile && fs.existsSync(this.currentAudioFile)) { - fs.unlinkSync(this.currentAudioFile); - } - const code = this.generateBasicCode(); - console.log('RecorderService.stopRecording() - Generated code:', code); - return code; - } - async requestScreenshot() { - console.log('RecorderService.requestScreenshot()'); - try { - const sources = await electron_1.ipcRenderer.invoke('get-screenshot'); - console.log('RecorderService.requestScreenshot() - Sources:', sources); - const screenSource = sources[0]; - await this.screenshotHandleEvent(null, screenSource.thumbnail); - } - catch (error) { - console.error('RecorderService.requestScreenshot() error:', error); - } - } - async screenshotHandleEvent(_, screenshot) { - console.log('RecorderService.screenshotHandleEvent()', { screenshot }); - this.currentScreenshot = screenshot; - } - async keyboardHandleEvent(_, event) { - console.log('RecorderService.keyboardHandleEvent()', { key: event.key }); - if (!this.recording) - return; - this.events.push({ - type: 'type', - identifier: event.key, - timestamp: Date.now(), - narration: this.lastTranscription - }); - } - async mouseHandleEvent(_, event) { - console.log('RecorderService.mouseHandleEvent()', { x: event.x, y: event.y }); - if (!this.recording) - return; - const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot); - console.log('RecorderService.mouseHandleEvent() - Screen analysis:', analysis); - const element = this.findElementAtPosition(analysis, event.x, event.y); - console.log('RecorderService.mouseHandleEvent() - Found element:', element); - if (element) { - this.events.push({ - type: 'click', - identifier: element.identifier, - timestamp: Date.now(), - narration: this.lastTranscription - }); - } - } - findElementAtPosition(analysis, x, y) { - console.log('RecorderService.findElementAtPosition()', { x, y, analysisElementsCount: analysis.elements.length }); - return analysis.elements.find((element) => { - const bounds = element.bounds; - const found = x >= bounds.x && - x <= bounds.x + bounds.width && - y >= bounds.y && - y <= bounds.y + bounds.height; - if (found) { - console.log('RecorderService.findElementAtPosition() - Found matching element:', element); - } - return found; + electron_1.ipcRenderer.removeListener('keyboard-event', this.handleKeyboardEvent); + electron_1.ipcRenderer.removeListener('mouse-event', this.handleMouseEvent); + // Cleanup temp directory + fs.readdirSync(this.tempDir).forEach(file => { + fs.unlinkSync(path.join(this.tempDir, file)); }); } generateBasicCode() { - console.log('RecorderService.generateBasicCode()', { eventsCount: this.events.length }); let basicCode = '10 REM BotDesktop Automation Script\n'; let lineNumber = 20; - for (const event of this.events) { - basicCode += `${lineNumber} REM ${event.narration}\n`; + this.eventGroups.forEach(group => { + basicCode += `${lineNumber} REM ${group.narration}\n`; lineNumber += 10; - switch (event.type) { - case 'click': - basicCode += `${lineNumber} CLICK "${event.identifier}"\n`; - break; - case 'type': - basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`; - break; - case 'move': - basicCode += `${lineNumber} MOVE "${event.identifier}"\n`; - break; - } - lineNumber += 10; - } + group.events.forEach(event => { + switch (event.type) { + case 'click': + basicCode += `${lineNumber} CLICK "${event.identifier}"\n`; + break; + case 'type': + basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`; + break; + case 'move': + basicCode += `${lineNumber} MOVE "${event.identifier}"\n`; + break; + } + lineNumber += 10; + }); + }); basicCode += `${lineNumber} END\n`; - console.log('RecorderService.generateBasicCode() - Generated code:', basicCode); return basicCode; } } diff --git a/src/components/App.tsx b/src/components/App.tsx index bdf15ed..c6b9bfe 100644 --- a/src/components/App.tsx +++ b/src/components/App.tsx @@ -36,10 +36,10 @@ const App: React.FC = () => { }; return ( -
-

BotDesktop Automation

+
+

General Bots Desktop

-
+
-
+

Generated BASIC Code:

-
{basicCode}
+
{basicCode}
+ + +
+ + General Bots +
+
+ ); }; diff --git a/src/main/main.ts b/src/main/main.ts index 6ac3348..48b0818 100644 --- a/src/main/main.ts +++ b/src/main/main.ts @@ -1,35 +1,139 @@ require('dotenv').config(); require('electron-require'); - -import { app, BrowserWindow, desktopCapturer, ipcMain } from 'electron'; +import { app } from 'electron'; +import { BrowserWindow, desktopCapturer, ipcMain } from 'electron'; import * as path from 'path'; import { systemPreferences } from 'electron'; import { RecorderService } from '../services/recorder.service'; import { PlayerService } from '../services/player.service'; +interface AudioCapture { + mediaRecorder: MediaRecorder | null; + audioStream: MediaStream | null; + analyserNode: AnalyserNode | null; + audioData: Uint8Array | null; + isCapturing: boolean; +} + +const audioCapture: AudioCapture = { + mediaRecorder: null, + audioStream: null, + analyserNode: null, + audioData: null, + isCapturing: false +}; + const recorder = new RecorderService(); const player = new PlayerService(); function createWindow() { const mainWindow = new BrowserWindow({ - width: 1200, - height: 800, - + width: 700, + height: 500, + backgroundColor: "grey", + center: true, + maximizable: false, + thickFrame: true, + autoHideMenuBar:true, webPreferences: { + experimentalFeatures: true, nodeIntegrationInWorker: true, nodeIntegration: true, nodeIntegrationInSubFrames: true, contextIsolation: false, preload: path.join(__dirname, '../preload/preload.js') } + }); + mainWindow.setAutoHideMenuBar(true); + mainWindow. setMaximizable(false); + if (process.env.NODE_ENV === 'development') { mainWindow.loadURL('http://localhost:8080'); mainWindow.webContents.openDevTools(); } else { mainWindow.loadFile(path.join(__dirname, '../../src/renderer/index.html')); - } + } ipcMain.handle('mouse-event', recorder.handleMouseEvent.bind(recorder)); + ipcMain.handle('keyboard-event', recorder.handleKeyboardEvent.bind(recorder)); + + + // Handler to capture the entire screen + ipcMain.handle('get-screenshot', async () => { + console.log('get-screenshot called'); + const sources = await desktopCapturer.getSources({ types: ['screen'] }); + const screenSource = sources[0]; // Get the first screen source + + const { thumbnail } = screenSource; // Thumbnail is a native image + return thumbnail.toPNG(); // Return the screenshot as PNG buffer + }); + + ipcMain.handle('start-recording', async () => { + console.log('start-recording called'); + await recorder.startRecording(); + }); + + ipcMain.handle('stop-recording', async () => { + console.log('stop-recording called'); + return await recorder.stopRecording(); + }); + + ipcMain.handle('execute-basic-code', async (_, code: string) => { + console.log('execute-basic-code called with:', code); + await player.executeBasicCode(code); + }); + + ipcMain.handle('check-microphone-permission', async () => { + console.log('check-microphone-permission called'); + if (process.platform === 'darwin') { + const status = await systemPreferences.getMediaAccessStatus('microphone'); + if (status !== 'granted') { + const success = await systemPreferences.askForMediaAccess('microphone'); + return success; + } + return true; + } + return true; // On Windows/Linux, permissions are handled by the OS + }); + + + ipcMain.handle('start-microphone-capture', async (event) => { + debugger; + const window = BrowserWindow.fromWebContents(event.sender); + if (!window) { + throw new Error('No window found for this request'); + } + return startMicrophoneCapture(window); + }); + + ipcMain.handle('stop-microphone-capture', async (event) => { + const window = BrowserWindow.fromWebContents(event.sender); + if (!window) { + throw new Error('No window found for this request'); + } + return stopMicrophoneCapture(window); + }); + + ipcMain.handle('start-microphone-capture', async (event, ...args) => { + // Perform asynchronous microphone capture logic here + try { + const result = await startMicrophoneCapture(args[0]); // Assuming this function is async + return result; + } catch (error) { + console.error("Error during microphone capture:", error); + throw error; // Send the error back to the renderer + } + }); + ipcMain.handle('stop-microphone-capture', async (event, ...args) => { + try { + const result = await stopMicrophoneCapture(args[0]); + return result; + } catch (error) { + console.error("Error stopping microphone capture:", error); + throw error; // Send the error back to the renderer + } + }); + } app.whenReady().then(createWindow); @@ -46,47 +150,153 @@ app.on('activate', () => { } }); -ipcMain.handle('mouse-event', recorder.mouseHandleEvent.bind(recorder)); -ipcMain.handle('keyboard-event', recorder.keyboardHandleEvent.bind(recorder)); -ipcMain.handle('screenshot-captured', recorder.screenshotHandleEvent.bind(recorder)); - -// Handler to capture the entire screen -ipcMain.handle('get-screenshot', async () => { - console.log('get-screenshot called'); - const sources = await desktopCapturer.getSources({ types: ['screen'] }); - const screenSource = sources[0]; // Get the first screen source - - const { thumbnail } = screenSource; // Thumbnail is a native image - return thumbnail.toPNG(); // Return the screenshot as PNG buffer -}); - -ipcMain.handle('start-recording', async () => { - console.log('start-recording called'); - await recorder.startRecording(); -}); - -ipcMain.handle('stop-recording', async () => { - console.log('stop-recording called'); - return await recorder.stopRecording(); -}); - -ipcMain.handle('execute-basic-code', async (_, code: string) => { - console.log('execute-basic-code called with:', code); - await player.executeBasicCode(code); -}); - -ipcMain.handle('check-microphone-permission', async () => { - console.log('check-microphone-permission called'); - if (process.platform === 'darwin') { - const status = await systemPreferences.getMediaAccessStatus('microphone'); - if (status !== 'granted') { - const success = await systemPreferences.askForMediaAccess('microphone'); - return success; - } - return true; - } - return true; // On Windows/Linux, permissions are handled by the OS -}); - // Enable required permissions app.commandLine.appendSwitch('enable-speech-dispatcher'); + + +// Register cleanup on app quit +app.on('will-quit', cleanupAudioCapture); + + + +// Function to get the focused window or first available window +function getFocusedWindow(): BrowserWindow | null { + const focusedWindow = BrowserWindow.getFocusedWindow(); + if (focusedWindow) return focusedWindow; + + const windows = BrowserWindow.getAllWindows(); + return windows.length > 0 ? windows[0] : null; +} + +// Function to safely send to window +function sendToWindow(channel: string, ...args: any[]) { + const window = getFocusedWindow(); + if (window && !window.isDestroyed()) { + window.webContents.send(channel, ...args); + } +} +async function startMicrophoneCapture(window: BrowserWindow): Promise { + console.log('Starting microphone capture...'); + + try { + + // Request microphone access + //@ts-ignore + const stream = await window.myApi.startMicrophone() + + audioCapture.audioStream = stream; + + // Set up audio analysis + const audioContext = new ((window as any).AudioContext || (window as any).webkitAudioContext)(); + const sourceNode = audioContext.createMediaStreamSource(stream); + audioCapture.analyserNode = audioContext.createAnalyser(); + audioCapture.analyserNode.fftSize = 2048; + + sourceNode.connect(audioCapture.analyserNode); + audioCapture.audioData = new Uint8Array(audioCapture.analyserNode.frequencyBinCount); + + // Set up MediaRecorder + audioCapture.mediaRecorder = new MediaRecorder(stream, { + mimeType: 'audio/webm;codecs=opus' + }); + + // Handle audio data + audioCapture.mediaRecorder.ondataavailable = (event: BlobEvent) => { + if (event.data.size > 0 && !window.isDestroyed()) { + // Convert blob to buffer and send to renderer + const reader = new FileReader(); + reader.onloadend = () => { + const buffer = Buffer.from(reader.result as ArrayBuffer); + window.webContents.send('audio-chunk', buffer); + }; + reader.readAsArrayBuffer(event.data); + } + }; + + // Start recording + audioCapture.mediaRecorder.start(1000); // Capture in 1-second chunks + audioCapture.isCapturing = true; + + // Start audio level monitoring + monitorAudioLevels(window); + + console.log('Microphone capture started successfully'); + } catch (error) { + console.error('Failed to start microphone capture:', error); + throw error; + } +} + +function monitorAudioLevels(window: BrowserWindow) { + if (!audioCapture.isCapturing || !audioCapture.analyserNode || !audioCapture.audioData || window.isDestroyed()) { + return; + } + + // Get audio level data + audioCapture.analyserNode.getByteFrequencyData(audioCapture.audioData); + + // Calculate average volume level (0-1) + const average = audioCapture.audioData.reduce((acc, value) => acc + value, 0) / + audioCapture.audioData.length / + 255; + + // Send level to renderer + if (!window.isDestroyed()) { + window.webContents.send('audio-level', average); + } + + // Continue monitoring + requestAnimationFrame(() => monitorAudioLevels(window)); +} + +function stopMicrophoneCapture(window: BrowserWindow) { + console.log('Stopping microphone capture...'); + + try { + if (audioCapture.mediaRecorder && audioCapture.mediaRecorder.state !== 'inactive') { + audioCapture.mediaRecorder.stop(); + } + + if (audioCapture.audioStream) { + audioCapture.audioStream.getTracks().forEach(track => track.stop()); + } + + if (audioCapture.analyserNode) { + audioCapture.analyserNode.disconnect(); + } + + audioCapture.isCapturing = false; + audioCapture.mediaRecorder = null; + audioCapture.audioStream = null; + audioCapture.analyserNode = null; + audioCapture.audioData = null; + + if (!window.isDestroyed()) { + window.webContents.send('microphone-stopped'); + } + + console.log('Microphone capture stopped successfully'); + } catch (error) { + console.error('Failed to stop microphone capture:', error); + throw error; + } +} + +// Error handler for audio processing +function handleAudioError(error: Error, window: BrowserWindow): void { + console.error('Audio processing error:', error); + stopMicrophoneCapture(window); + + // Notify renderer of error if window still exists + if (!window.isDestroyed()) { + window.webContents.send('audio-error', error.message); + } +} + +// Clean up resources when app is closing +export function cleanupAudioCapture(): void { + const window = getFocusedWindow(); + if (window) { + stopMicrophoneCapture(window); + } +} diff --git a/src/preload/preload.ts b/src/preload/preload.ts index ffed7db..9613b28 100644 --- a/src/preload/preload.ts +++ b/src/preload/preload.ts @@ -1,7 +1,20 @@ const { ipcRenderer } = require('electron'); +const { contextBridge } = require('electron'); + +const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)(); + //@ts-nocheck (window as any).myApi = { + + startMicrophone: ()=>{ + alert(1); + if (navigator.mediaDevices) { + return navigator.mediaDevices.getUserMedia({ audio: true }); + } else { + console.error("MediaDevices API not supported"); + } + }, sendMessage: (message: any) => { console.log('[preload] sendMessage called with:', message); return ipcRenderer.send('message-from-renderer', message); @@ -11,3 +24,4 @@ const { ipcRenderer } = require('electron'); return ipcRenderer.on('message-from-main', (event, arg) => callback(arg)); }, }; + diff --git a/src/renderer/index.html b/src/renderer/index.html index ef787dc..5065acd 100644 --- a/src/renderer/index.html +++ b/src/renderer/index.html @@ -2,7 +2,7 @@ - BotDesktop + General Bots Desktop diff --git a/src/services/openai.service.ts b/src/services/openai.service.ts index 53dedb1..e19ee5e 100644 --- a/src/services/openai.service.ts +++ b/src/services/openai.service.ts @@ -56,7 +56,7 @@ export class OpenAIService { role: 'system', content: `You are an AI that analyzes screenshots and voice commands to determine user intentions for automation. You should identify UI elements and return specific actions in JSON format. - Focus on the area near the cursor position when relevant.` + Focus on the area near the field ${context.identifier}.` }, { role: 'user', diff --git a/src/services/player.service.ts b/src/services/player.service.ts index 98074d5..09ace25 100644 --- a/src/services/player.service.ts +++ b/src/services/player.service.ts @@ -1,9 +1,20 @@ -import { ipcMain } from 'electron'; -import { AutomationEvent, ScreenAnalysis } from './types'; -import { OpenAIService } from './openai.service'; +import { ipcRenderer, ipcMain } from 'electron'; +import { AutomationEvent, ScreenAnalysis, WhisperResponse } from '../services/types'; +import { OpenAIService } from '../services/openai.service'; +import * as path from 'path'; +import * as fs from 'fs'; + +interface EventGroup { + narration: string; + events: AutomationEvent[]; + screenshot: string; + timestamp: number; +} export class PlayerService { private openAIService: OpenAIService; + private currentScreenshot: string = ''; + private isPlaying: boolean = false; constructor() { console.log('[PlayerService] Initializing'); @@ -12,31 +23,60 @@ export class PlayerService { async executeBasicCode(code: string) { console.log('[PlayerService] executeBasicCode called with:', code); + this.isPlaying = true; const lines = code.split('\n'); - for (const line of lines) { - if (line.trim().startsWith('REM') || line.trim() === '') continue; - - const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/); - if (!match) continue; + try { + for (const line of lines) { + if (!this.isPlaying) break; + if (line.trim().startsWith('REM') || line.trim() === '') continue; + + const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/); + if (!match) continue; - const [_, command, identifier, value] = match; - console.log('[PlayerService] Executing command:', { command, identifier, value }); - await this.executeCommand(command, identifier, value); - await new Promise(resolve => setTimeout(resolve, 500)); + const [_, command, identifier, value] = match; + console.log('[PlayerService] Executing command:', { command, identifier, value }); + + await this.captureAndAnalyzeScreen(); + await this.executeCommand(command, identifier, value); + await new Promise(resolve => setTimeout(resolve, 500)); + } + } catch (error) { + console.error('[PlayerService] Execution error:', error); + this.isPlaying = false; + throw error; } } + private async captureAndAnalyzeScreen() { + console.log('[PlayerService] captureAndAnalyzeScreen called'); + const sources = await ipcRenderer.invoke('get-screenshot'); + this.currentScreenshot = sources[0].thumbnail; + } + private async executeCommand(command: string, identifier: string, value?: string) { console.log('[PlayerService] executeCommand called with:', { command, identifier, value }); - const screenshotPath = await this.captureScreen(); - console.log('[PlayerService] Screen captured at:', screenshotPath); + const element = await this.openAIService.analyzeScreenWithContext({ + screenshot: this.currentScreenshot, + transcription: '', + identifier,cursorPosition: null + }); + + //@ts-nocheck - const analysis = await this.openAIService.analyzeScreen(screenshotPath); - const element = analysis.elements.find(e => e.identifier === identifier); - - if (!element) throw new Error(`Element not found: ${identifier}`); + if (!element) { + console.warn(`[PlayerService] Element not found: ${identifier}, retrying with fresh analysis`); + await this.captureAndAnalyzeScreen(); + const newElement = await this.openAIService.analyzeScreenWithContext({ + screenshot: this.currentScreenshot, + transcription: '', + cursorPosition: await ipcRenderer.invoke('get-cursor-position'), + identifier + }); + + if (!newElement) throw new Error(`Element not found after retry: ${identifier}`); + } const centerX = element.bounds.x + element.bounds.width/2; const centerY = element.bounds.y + element.bounds.height/2; @@ -44,49 +84,22 @@ export class PlayerService { switch (command) { case 'CLICK': console.log('[PlayerService] Simulating click at:', { centerX, centerY }); - await this.simulateClick(centerX, centerY); + await ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY }); break; case 'TYPE': console.log('[PlayerService] Simulating type:', { centerX, centerY, value }); - await this.simulateClick(centerX, centerY); - await this.simulateTyping(value || ''); + await ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY }); + await ipcRenderer.invoke('simulate-type', { text: value || '' }); + break; + case 'MOVE': + console.log('[PlayerService] Simulating move:', { centerX, centerY }); + await ipcRenderer.invoke('simulate-move', { x: centerX, y: centerY }); break; } } - private async captureScreen(): Promise { - console.log('[PlayerService] captureScreen called'); - return new Promise((resolve, reject) => { - ipcMain.once('screen-captured', (_, screenshotPath) => { - console.log('[PlayerService] Screen captured event received:', screenshotPath); - resolve(screenshotPath); - }); - - ipcMain.emit('capture-screen'); - }); + public stop() { + console.log('[PlayerService] Stopping playback'); + this.isPlaying = false; } - - private async simulateClick(x: number, y: number): Promise { - console.log('[PlayerService] simulateClick called with:', { x, y }); - return new Promise((resolve) => { - ipcMain.once('click-completed', () => { - console.log('[PlayerService] Click completed'); - resolve(); - }); - - ipcMain.emit('simulate-click', { x, y }); - }); - } - - private async simulateTyping(text: string): Promise { - console.log('[PlayerService] simulateTyping called with:', text); - return new Promise((resolve) => { - ipcMain.once('typing-completed', () => { - console.log('[PlayerService] Typing completed'); - resolve(); - }); - - ipcMain.emit('simulate-typing', { text }); - }); - } -} +} \ No newline at end of file diff --git a/src/services/recorder.service.ts b/src/services/recorder.service.ts index 46e45e2..7655012 100644 --- a/src/services/recorder.service.ts +++ b/src/services/recorder.service.ts @@ -1,24 +1,32 @@ import { ipcRenderer } from 'electron'; -import { AutomationEvent, ScreenAnalysis, WhisperResponse } from '../services/types'; +import { AutomationEvent, EventGroup, ScreenAnalysis, WhisperResponse } from '../services/types'; import { OpenAIService } from '../services/openai.service'; import * as path from 'path'; import * as fs from 'fs'; export class RecorderService { - private events: AutomationEvent[] = []; + private eventGroups: EventGroup[] = []; + private currentEvents: AutomationEvent[] = []; private recording: boolean = false; private openAIService: OpenAIService; private currentScreenshot: string = ''; - private lastTranscription: string = ''; - private currentAudioFile: string = ''; + private audioBuffer: Buffer[] = []; + private isListeningToMicrophone: boolean = false; private silenceTimer: NodeJS.Timeout | null = null; private isProcessingAudio: boolean = false; private tempDir: string; + private SILENCE_THRESHOLD = 0.01; + private SILENCE_DURATION = 1500; // 1.5 seconds of silence to trigger processing + private MIN_AUDIO_DURATION = 500; // Minimum audio duration to process constructor() { console.log('RecorderService.constructor()'); this.openAIService = new OpenAIService(); this.tempDir = path.join(process.cwd(), 'temp_recordings'); + this.ensureTempDirectory(); + } + + private ensureTempDirectory() { if (!fs.existsSync(this.tempDir)) { fs.mkdirSync(this.tempDir, { recursive: true }); } @@ -28,10 +36,11 @@ export class RecorderService { console.log('RecorderService.startRecording()'); try { this.recording = true; - this.events = []; - await this.setupAudioRecording(); - await this.requestScreenshot(); - ipcRenderer.on('keyboard-event', this.keyboardHandleEvent); + this.eventGroups = []; + this.currentEvents = []; + await this.startMicrophoneCapture(); + await this.captureInitialScreenshot(); + this.setupEventListeners(); } catch (error) { console.error('RecorderService.startRecording() error:', error); this.recording = false; @@ -39,127 +48,148 @@ export class RecorderService { } } - private async setupAudioRecording() { - console.log('RecorderService.setupAudioRecording()'); + private async startMicrophoneCapture() { + console.log('RecorderService.startMicrophoneCapture()'); try { + this.isListeningToMicrophone = true; ipcRenderer.on('audio-level', this.handleAudioLevel); ipcRenderer.on('audio-chunk', this.handleAudioChunk); + await ipcRenderer.invoke('start-microphone-capture'); } catch (error) { - console.error('RecorderService.setupAudioRecording() error:', error); - throw new Error(`Failed to setup audio recording: ${error.message}`); + console.error('Failed to start microphone capture:', error); + throw new Error(`Microphone initialization failed: ${error.message}`); } } - private handleAudioLevel = async (_: any, level: number) => { - console.log('RecorderService.handleAudioLevel()', { level }); - if (!this.recording) return; + public handleAudioLevel = (_: any, level: number) => { + if (!this.recording || !this.isListeningToMicrophone) return; - const SILENCE_THRESHOLD = 0.01; - const SILENCE_DURATION = 1000; - - if (level < SILENCE_THRESHOLD) { - if (!this.silenceTimer && !this.isProcessingAudio) { - console.log('RecorderService.handleAudioLevel() - Setting silence timer'); + if (level < this.SILENCE_THRESHOLD) { + if (!this.silenceTimer && !this.isProcessingAudio && this.audioBuffer.length > 0) { this.silenceTimer = setTimeout(async () => { if (this.recording) { - await this.processSilence(); + await this.processCapturedAudio(); } - }, SILENCE_DURATION); + }, this.SILENCE_DURATION); } } else { if (this.silenceTimer) { - console.log('RecorderService.handleAudioLevel() - Clearing silence timer'); clearTimeout(this.silenceTimer); this.silenceTimer = null; } } } - private handleAudioChunk = async (_: any, chunk: Buffer) => { - console.log('RecorderService.handleAudioChunk()', { chunkSize: chunk.length }); - if (!this.recording) return; + public handleAudioChunk = (_: any, chunk: Buffer) => { + if (!this.recording || !this.isListeningToMicrophone) return; + this.audioBuffer.push(chunk); + } + + private async processCapturedAudio() { + if (this.isProcessingAudio || this.audioBuffer.length === 0) return; + + this.isProcessingAudio = true; + const combinedBuffer = Buffer.concat(this.audioBuffer); + this.audioBuffer = []; // Clear the buffer try { const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`); - fs.writeFileSync(audioFilePath, chunk); + fs.writeFileSync(audioFilePath, combinedBuffer); - if (this.silenceTimer) { - clearTimeout(this.silenceTimer); - this.silenceTimer = null; - await this.processAudioFile(audioFilePath); - } - } catch (error) { - console.error('RecorderService.handleAudioChunk() error:', error); - } - }; + const transcription = await this.openAIService.transcribeAudio( + new Blob([combinedBuffer], { type: 'audio/wav' }) + ); - private async processSilence() { - console.log('RecorderService.processSilence()'); - if (this.isProcessingAudio) return; - - this.isProcessingAudio = true; - try { - const audioFilePath = await ipcRenderer.invoke('save-audio-chunk'); - console.log('RecorderService.processSilence() - Audio saved to:', audioFilePath); - if (audioFilePath) { - this.currentAudioFile = audioFilePath; - await this.processAudioFile(audioFilePath); - await this.requestScreenshot(); + if (transcription.text.trim()) { + await this.processNarrationWithEvents(transcription.text); } + + fs.unlinkSync(audioFilePath); } catch (error) { - console.error('RecorderService.processSilence() error:', error); + console.error('Audio processing error:', error); } finally { this.isProcessingAudio = false; } } - private async processAudioFile(audioFilePath: string) { - console.log('RecorderService.processAudioFile()', { audioFilePath }); - try { - const audioBuffer = fs.readFileSync(audioFilePath); - const transcription = await this.openAIService.transcribeAudio( - new Blob([audioBuffer], { type: 'audio/wav' }) - ); - console.log('RecorderService.processAudioFile() - Transcription:', transcription); + private async processNarrationWithEvents(narration: string) { + if (this.currentEvents.length === 0) return; - if (transcription.text.trim()) { - await this.processTranscription(transcription); - } + const eventGroup: EventGroup = { + narration, + events: [...this.currentEvents], + screenshot: this.currentScreenshot, + timestamp: Date.now() + }; - fs.unlinkSync(audioFilePath); - } catch (error) { - console.error('RecorderService.processAudioFile() error:', error); - } + this.eventGroups.push(eventGroup); + this.currentEvents = []; // Clear current events for next group + await this.captureInitialScreenshot(); // Get fresh screenshot for next group } - private async processTranscription(transcription: WhisperResponse) { - console.log('RecorderService.processTranscription()', { transcription }); - this.lastTranscription = transcription.text; - - const cursorPosition = await ipcRenderer.invoke('get-cursor-position'); - console.log('RecorderService.processTranscription() - Cursor position:', cursorPosition); - - const analysis = await this.openAIService.analyzeScreenWithContext({ - screenshot: this.currentScreenshot, - transcription: this.lastTranscription, - cursorPosition - }); - console.log('RecorderService.processTranscription() - Screen analysis:', analysis); + private setupEventListeners() { + ipcRenderer.on('keyboard-event', this.handleKeyboardEvent); + ipcRenderer.on('mouse-event', this.handleMouseEvent); + } - if (analysis) { - this.events.push({ - type: analysis.type, - identifier: analysis.identifier, - value: analysis.value, + private async captureInitialScreenshot() { + const sources = await ipcRenderer.invoke('get-screenshot'); + this.currentScreenshot = sources[0].thumbnail; + } + + public handleKeyboardEvent = async (_: any, event: KeyboardEvent) => { + if (!this.recording) return; + + this.currentEvents.push({ + type: 'type', + identifier: event.key, + value: event.key, + timestamp: Date.now(), + narration: '' + }); + } + + public handleMouseEvent = async (_: any, event: MouseEvent) => { + if (!this.recording) return; + + const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot); + const element = this.findElementAtPosition(analysis, event.clientX, event.clientY); + + if (element) { + this.currentEvents.push({ + type: 'click', + identifier: element.identifier, timestamp: Date.now(), - narration: this.lastTranscription + narration: '' }); } } + private findElementAtPosition(analysis: ScreenAnalysis, x: number, y: number) { + return analysis.elements.find(element => { + const bounds = element.bounds; + return x >= bounds.x && + x <= bounds.x + bounds.width && + y >= bounds.y && + y <= bounds.y + bounds.height; + }); + } + public async stopRecording(): Promise { console.log('RecorderService.stopRecording()'); + + // Process any remaining audio + if (this.audioBuffer.length > 0) { + await this.processCapturedAudio(); + } + + this.cleanup(); + return this.generateBasicCode(); + } + + private cleanup() { this.recording = false; + this.isListeningToMicrophone = false; if (this.silenceTimer) { clearTimeout(this.silenceTimer); @@ -168,106 +198,40 @@ export class RecorderService { ipcRenderer.removeListener('audio-level', this.handleAudioLevel); ipcRenderer.removeListener('audio-chunk', this.handleAudioChunk); - ipcRenderer.removeListener('keyboard-event', this.keyboardHandleEvent); + ipcRenderer.removeListener('keyboard-event', this.handleKeyboardEvent); + ipcRenderer.removeListener('mouse-event', this.handleMouseEvent); - if (this.currentAudioFile && fs.existsSync(this.currentAudioFile)) { - fs.unlinkSync(this.currentAudioFile); - } - - const code = this.generateBasicCode(); - console.log('RecorderService.stopRecording() - Generated code:', code); - return code; - } - - private async requestScreenshot() { - console.log('RecorderService.requestScreenshot()'); - try { - const sources = await ipcRenderer.invoke('get-screenshot'); - console.log('RecorderService.requestScreenshot() - Sources:', sources); - const screenSource = sources[0]; - await this.screenshotHandleEvent(null, screenSource.thumbnail); - } catch (error) { - console.error('RecorderService.requestScreenshot() error:', error); - } - } - - public async screenshotHandleEvent(_: any, screenshot: string) { - console.log('RecorderService.screenshotHandleEvent()', { screenshot }); - this.currentScreenshot = screenshot; - } - - public async keyboardHandleEvent(_: any, event: KeyboardEvent) { - console.log('RecorderService.keyboardHandleEvent()', { key: event.key }); - if (!this.recording) return; - - this.events.push({ - type: 'type', - identifier: event.key, - timestamp: Date.now(), - narration: this.lastTranscription - }); - } - - public async mouseHandleEvent(_: any, event: any) { - console.log('RecorderService.mouseHandleEvent()', { x: event.x, y: event.y }); - if (!this.recording) return; - - const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot); - console.log('RecorderService.mouseHandleEvent() - Screen analysis:', analysis); - - const element = this.findElementAtPosition(analysis, event.x, event.y); - console.log('RecorderService.mouseHandleEvent() - Found element:', element); - - if (element) { - this.events.push({ - type: 'click', - identifier: element.identifier, - timestamp: Date.now(), - narration: this.lastTranscription - }); - } - } - - private findElementAtPosition(analysis: ScreenAnalysis, x: number, y: number) { - console.log('RecorderService.findElementAtPosition()', { x, y, analysisElementsCount: analysis.elements.length }); - return analysis.elements.find((element) => { - const bounds = element.bounds; - const found = x >= bounds.x && - x <= bounds.x + bounds.width && - y >= bounds.y && - y <= bounds.y + bounds.height; - if (found) { - console.log('RecorderService.findElementAtPosition() - Found matching element:', element); - } - return found; + // Cleanup temp directory + fs.readdirSync(this.tempDir).forEach(file => { + fs.unlinkSync(path.join(this.tempDir, file)); }); } private generateBasicCode(): string { - console.log('RecorderService.generateBasicCode()', { eventsCount: this.events.length }); let basicCode = '10 REM BotDesktop Automation Script\n'; let lineNumber = 20; - for (const event of this.events) { - basicCode += `${lineNumber} REM ${event.narration}\n`; + this.eventGroups.forEach(group => { + basicCode += `${lineNumber} REM ${group.narration}\n`; lineNumber += 10; - switch (event.type) { - case 'click': - basicCode += `${lineNumber} CLICK "${event.identifier}"\n`; - break; - case 'type': - basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`; - break; - case 'move': - basicCode += `${lineNumber} MOVE "${event.identifier}"\n`; - break; - } - lineNumber += 10; - } + group.events.forEach(event => { + switch (event.type) { + case 'click': + basicCode += `${lineNumber} CLICK "${event.identifier}"\n`; + break; + case 'type': + basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`; + break; + case 'move': + basicCode += `${lineNumber} MOVE "${event.identifier}"\n`; + break; + } + lineNumber += 10; + }); + }); basicCode += `${lineNumber} END\n`; - console.log('RecorderService.generateBasicCode() - Generated code:', basicCode); return basicCode; } } \ No newline at end of file diff --git a/src/services/types.ts b/src/services/types.ts index 97eca85..951649e 100644 --- a/src/services/types.ts +++ b/src/services/types.ts @@ -1,3 +1,22 @@ + +export interface PlaybackEvent { + command: string; + args: string[]; + description?: string; +} + +export interface ScreenElement { + identifier: string; + bounds: { + x: number; + y: number; + width: number; + height: number; + }; + windowName: string; + value?: string; +} + export interface AutomationAction { type: 'click' | 'type' | 'move'; identifier: string; @@ -12,6 +31,12 @@ export interface AutomationAction { } +export interface EventGroup { + narration: string; + events: AutomationEvent[]; + screenshot: string; + timestamp: number; +} export interface AutomationEvent { type: 'click' | 'type' | 'move'; @@ -30,6 +55,7 @@ export interface ScreenContext { screenshot: string; transcription: string; cursorPosition: { x: number, y: number }; + identifier: string; } export interface ScreenAnalysis {