diff --git a/dist/components/App.js b/dist/components/App.js
index 5128888..4467674 100644
--- a/dist/components/App.js
+++ b/dist/components/App.js
@@ -55,13 +55,15 @@ const App = () => {
console.error('Playback error:', error);
}
};
- return (react_1.default.createElement("div", { className: "p-4" },
- react_1.default.createElement("h1", { className: "text-2xl font-bold mb-4" }, "BotDesktop Automation"),
- react_1.default.createElement("div", { className: "space-x-4 mb-4" },
+ return (react_1.default.createElement("div", { className: "p-4 h-auto" },
+ react_1.default.createElement("h1", { className: "text-2xl font-bold mb-4" }, "General Bots Desktop"),
+ react_1.default.createElement("div", { className: "space-x-4 mb-4 h-auto" },
react_1.default.createElement("button", { className: `px-4 py-2 rounded ${recording ? 'bg-red-500' : 'bg-blue-500'} text-white`, onClick: recording ? handleStopRecording : handleStartRecording }, recording ? 'Stop Recording' : 'Start Recording'),
react_1.default.createElement("button", { className: "px-4 py-2 rounded bg-green-500 text-white", onClick: handlePlayback, disabled: !basicCode }, "Play Recording")),
- react_1.default.createElement("div", { className: "mt-4" },
+ react_1.default.createElement("div", { className: "mt-4 h-20" },
react_1.default.createElement("h2", { className: "text-xl font-bold mb-2" }, "Generated BASIC Code:"),
- react_1.default.createElement("pre", { className: "bg-gray-100 p-2 rounded border" }, basicCode))));
+ react_1.default.createElement("pre", { className: "h-20 min-h-100 bg-gray-100 p-2 rounded border" }, basicCode)),
+ react_1.default.createElement("div", { className: "mb-4" },
+ react_1.default.createElement("a", { href: "https://github.com/General Bots" }, "General Bots"))));
};
exports.default = App;
diff --git a/dist/main/main.js b/dist/main/main.js
index 517e3d2..460f5e7 100644
--- a/dist/main/main.js
+++ b/dist/main/main.js
@@ -23,20 +23,35 @@ var __importStar = (this && this.__importStar) || function (mod) {
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
+exports.cleanupAudioCapture = cleanupAudioCapture;
require('dotenv').config();
require('electron-require');
const electron_1 = require("electron");
-const path = __importStar(require("path"));
const electron_2 = require("electron");
+const path = __importStar(require("path"));
+const electron_3 = require("electron");
const recorder_service_1 = require("../services/recorder.service");
const player_service_1 = require("../services/player.service");
+const audioCapture = {
+ mediaRecorder: null,
+ audioStream: null,
+ analyserNode: null,
+ audioData: null,
+ isCapturing: false
+};
const recorder = new recorder_service_1.RecorderService();
const player = new player_service_1.PlayerService();
function createWindow() {
- const mainWindow = new electron_1.BrowserWindow({
- width: 1200,
- height: 800,
+ const mainWindow = new electron_2.BrowserWindow({
+ width: 700,
+ height: 500,
+ backgroundColor: "grey",
+ center: true,
+ maximizable: false,
+ thickFrame: true,
+ autoHideMenuBar: true,
webPreferences: {
+ experimentalFeatures: true,
nodeIntegrationInWorker: true,
nodeIntegration: true,
nodeIntegrationInSubFrames: true,
@@ -44,6 +59,8 @@ function createWindow() {
preload: path.join(__dirname, '../preload/preload.js')
}
});
+ mainWindow.setAutoHideMenuBar(true);
+ mainWindow.setMaximizable(false);
if (process.env.NODE_ENV === 'development') {
mainWindow.loadURL('http://localhost:8080');
mainWindow.webContents.openDevTools();
@@ -51,6 +68,76 @@ function createWindow() {
else {
mainWindow.loadFile(path.join(__dirname, '../../src/renderer/index.html'));
}
+ electron_2.ipcMain.handle('mouse-event', recorder.handleMouseEvent.bind(recorder));
+ electron_2.ipcMain.handle('keyboard-event', recorder.handleKeyboardEvent.bind(recorder));
+ // Handler to capture the entire screen
+ electron_2.ipcMain.handle('get-screenshot', async () => {
+ console.log('get-screenshot called');
+ const sources = await electron_2.desktopCapturer.getSources({ types: ['screen'] });
+ const screenSource = sources[0]; // Get the first screen source
+ const { thumbnail } = screenSource; // Thumbnail is a native image
+ return thumbnail.toPNG(); // Return the screenshot as PNG buffer
+ });
+ electron_2.ipcMain.handle('start-recording', async () => {
+ console.log('start-recording called');
+ await recorder.startRecording();
+ });
+ electron_2.ipcMain.handle('stop-recording', async () => {
+ console.log('stop-recording called');
+ return await recorder.stopRecording();
+ });
+ electron_2.ipcMain.handle('execute-basic-code', async (_, code) => {
+ console.log('execute-basic-code called with:', code);
+ await player.executeBasicCode(code);
+ });
+ electron_2.ipcMain.handle('check-microphone-permission', async () => {
+ console.log('check-microphone-permission called');
+ if (process.platform === 'darwin') {
+ const status = await electron_3.systemPreferences.getMediaAccessStatus('microphone');
+ if (status !== 'granted') {
+ const success = await electron_3.systemPreferences.askForMediaAccess('microphone');
+ return success;
+ }
+ return true;
+ }
+ return true; // On Windows/Linux, permissions are handled by the OS
+ });
+ electron_2.ipcMain.handle('start-microphone-capture', async (event) => {
+ debugger;
+ const window = electron_2.BrowserWindow.fromWebContents(event.sender);
+ if (!window) {
+ throw new Error('No window found for this request');
+ }
+ return startMicrophoneCapture(window);
+ });
+ electron_2.ipcMain.handle('stop-microphone-capture', async (event) => {
+ const window = electron_2.BrowserWindow.fromWebContents(event.sender);
+ if (!window) {
+ throw new Error('No window found for this request');
+ }
+ return stopMicrophoneCapture(window);
+ });
+ electron_2.ipcMain.handle('start-microphone-capture', async (event, ...args) => {
+ // Perform asynchronous microphone capture logic here
+ try {
+ const result = await startMicrophoneCapture(args[0]); // Assuming this function is async
+ return result;
+ }
+ catch (error) {
+ console.error("Error during microphone capture:", error);
+ throw error; // Send the error back to the renderer
+ }
+ });
+ electron_2.ipcMain.handle('stop-microphone-capture', async (event, ...args) => {
+ try {
+ const result = await stopMicrophoneCapture(args[0]);
+ return result;
+ }
+ catch (error) {
+ console.error("Error stopping microphone capture:", error);
+ throw error; // Send the error back to the renderer
+ }
+ });
}
electron_1.app.whenReady().then(createWindow);
electron_1.app.on('window-all-closed', () => {
@@ -59,44 +146,128 @@ electron_1.app.on('window-all-closed', () => {
}
});
electron_1.app.on('activate', () => {
- if (electron_1.BrowserWindow.getAllWindows().length === 0) {
+ if (electron_2.BrowserWindow.getAllWindows().length === 0) {
createWindow();
}
});
-electron_1.ipcMain.handle('mouse-event', recorder.mouseHandleEvent.bind(recorder));
-electron_1.ipcMain.handle('keyboard-event', recorder.keyboardHandleEvent.bind(recorder));
-electron_1.ipcMain.handle('screenshot-captured', recorder.screenshotHandleEvent.bind(recorder));
-// Handler to capture the entire screen
-electron_1.ipcMain.handle('get-screenshot', async () => {
- console.log('get-screenshot called');
- const sources = await electron_1.desktopCapturer.getSources({ types: ['screen'] });
- const screenSource = sources[0]; // Get the first screen source
- const { thumbnail } = screenSource; // Thumbnail is a native image
- return thumbnail.toPNG(); // Return the screenshot as PNG buffer
-});
-electron_1.ipcMain.handle('start-recording', async () => {
- console.log('start-recording called');
- await recorder.startRecording();
-});
-electron_1.ipcMain.handle('stop-recording', async () => {
- console.log('stop-recording called');
- return await recorder.stopRecording();
-});
-electron_1.ipcMain.handle('execute-basic-code', async (_, code) => {
- console.log('execute-basic-code called with:', code);
- await player.executeBasicCode(code);
-});
-electron_1.ipcMain.handle('check-microphone-permission', async () => {
- console.log('check-microphone-permission called');
- if (process.platform === 'darwin') {
- const status = await electron_2.systemPreferences.getMediaAccessStatus('microphone');
- if (status !== 'granted') {
- const success = await electron_2.systemPreferences.askForMediaAccess('microphone');
- return success;
- }
- return true;
- }
- return true; // On Windows/Linux, permissions are handled by the OS
-});
// Enable required permissions
electron_1.app.commandLine.appendSwitch('enable-speech-dispatcher');
+// Register cleanup on app quit
+electron_1.app.on('will-quit', cleanupAudioCapture);
+// Function to get the focused window or first available window
+function getFocusedWindow() {
+ const focusedWindow = electron_2.BrowserWindow.getFocusedWindow();
+ if (focusedWindow)
+ return focusedWindow;
+ const windows = electron_2.BrowserWindow.getAllWindows();
+ return windows.length > 0 ? windows[0] : null;
+}
+// Function to safely send to window
+function sendToWindow(channel, ...args) {
+ const window = getFocusedWindow();
+ if (window && !window.isDestroyed()) {
+ window.webContents.send(channel, ...args);
+ }
+}
+async function startMicrophoneCapture(window) {
+ console.log('Starting microphone capture...');
+ try {
+ // Request microphone access
+ //@ts-ignore
+ const stream = await window.myApi.startMicrophone();
+ audioCapture.audioStream = stream;
+ // Set up audio analysis
+ const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+ const sourceNode = audioContext.createMediaStreamSource(stream);
+ audioCapture.analyserNode = audioContext.createAnalyser();
+ audioCapture.analyserNode.fftSize = 2048;
+ sourceNode.connect(audioCapture.analyserNode);
+ audioCapture.audioData = new Uint8Array(audioCapture.analyserNode.frequencyBinCount);
+ // Set up MediaRecorder
+ audioCapture.mediaRecorder = new MediaRecorder(stream, {
+ mimeType: 'audio/webm;codecs=opus'
+ });
+ // Handle audio data
+ audioCapture.mediaRecorder.ondataavailable = (event) => {
+ if (event.data.size > 0 && !window.isDestroyed()) {
+ // Convert blob to buffer and send to renderer
+ const reader = new FileReader();
+ reader.onloadend = () => {
+ const buffer = Buffer.from(reader.result);
+ window.webContents.send('audio-chunk', buffer);
+ };
+ reader.readAsArrayBuffer(event.data);
+ }
+ };
+ // Start recording
+ audioCapture.mediaRecorder.start(1000); // Capture in 1-second chunks
+ audioCapture.isCapturing = true;
+ // Start audio level monitoring
+ monitorAudioLevels(window);
+ console.log('Microphone capture started successfully');
+ }
+ catch (error) {
+ console.error('Failed to start microphone capture:', error);
+ throw error;
+ }
+}
+function monitorAudioLevels(window) {
+ if (!audioCapture.isCapturing || !audioCapture.analyserNode || !audioCapture.audioData || window.isDestroyed()) {
+ return;
+ }
+ // Get audio level data
+ audioCapture.analyserNode.getByteFrequencyData(audioCapture.audioData);
+ // Calculate average volume level (0-1)
+ const average = audioCapture.audioData.reduce((acc, value) => acc + value, 0) /
+ audioCapture.audioData.length /
+ 255;
+ // Send level to renderer
+ if (!window.isDestroyed()) {
+ window.webContents.send('audio-level', average);
+ }
+ // Continue monitoring
+ requestAnimationFrame(() => monitorAudioLevels(window));
+}
+function stopMicrophoneCapture(window) {
+ console.log('Stopping microphone capture...');
+ try {
+ if (audioCapture.mediaRecorder && audioCapture.mediaRecorder.state !== 'inactive') {
+ audioCapture.mediaRecorder.stop();
+ }
+ if (audioCapture.audioStream) {
+ audioCapture.audioStream.getTracks().forEach(track => track.stop());
+ }
+ if (audioCapture.analyserNode) {
+ audioCapture.analyserNode.disconnect();
+ }
+ audioCapture.isCapturing = false;
+ audioCapture.mediaRecorder = null;
+ audioCapture.audioStream = null;
+ audioCapture.analyserNode = null;
+ audioCapture.audioData = null;
+ if (!window.isDestroyed()) {
+ window.webContents.send('microphone-stopped');
+ }
+ console.log('Microphone capture stopped successfully');
+ }
+ catch (error) {
+ console.error('Failed to stop microphone capture:', error);
+ throw error;
+ }
+}
+// Error handler for audio processing
+function handleAudioError(error, window) {
+ console.error('Audio processing error:', error);
+ stopMicrophoneCapture(window);
+ // Notify renderer of error if window still exists
+ if (!window.isDestroyed()) {
+ window.webContents.send('audio-error', error.message);
+ }
+}
+// Clean up resources when app is closing
+function cleanupAudioCapture() {
+ const window = getFocusedWindow();
+ if (window) {
+ stopMicrophoneCapture(window);
+ }
+}
diff --git a/dist/preload/preload.js b/dist/preload/preload.js
index 158bcee..e718dda 100644
--- a/dist/preload/preload.js
+++ b/dist/preload/preload.js
@@ -1,6 +1,17 @@
const { ipcRenderer } = require('electron');
+const { contextBridge } = require('electron');
+const audioContext = new (window.AudioContext || window.webkitAudioContext)();
//@ts-nocheck
window.myApi = {
+ startMicrophone: () => {
+ alert(1);
+ if (navigator.mediaDevices) {
+ return navigator.mediaDevices.getUserMedia({ audio: true });
+ }
+ else {
+ console.error("MediaDevices API not supported");
+ }
+ },
sendMessage: (message) => {
console.log('[preload] sendMessage called with:', message);
return ipcRenderer.send('message-from-renderer', message);
diff --git a/dist/services/openai.service.js b/dist/services/openai.service.js
index ddd4ff2..2b5df05 100644
--- a/dist/services/openai.service.js
+++ b/dist/services/openai.service.js
@@ -51,7 +51,7 @@ class OpenAIService {
role: 'system',
content: `You are an AI that analyzes screenshots and voice commands to determine user intentions for automation.
You should identify UI elements and return specific actions in JSON format.
- Focus on the area near the cursor position when relevant.`
+ Focus on the area near the field ${context.identifier}.`
},
{
role: 'user',
diff --git a/dist/services/player.service.js b/dist/services/player.service.js
index a795a94..b348cad 100644
--- a/dist/services/player.service.js
+++ b/dist/services/player.service.js
@@ -2,78 +2,86 @@
Object.defineProperty(exports, "__esModule", { value: true });
exports.PlayerService = void 0;
const electron_1 = require("electron");
-const openai_service_1 = require("./openai.service");
+const openai_service_1 = require("../services/openai.service");
class PlayerService {
constructor() {
+ this.currentScreenshot = '';
+ this.isPlaying = false;
console.log('[PlayerService] Initializing');
this.openAIService = new openai_service_1.OpenAIService();
}
async executeBasicCode(code) {
console.log('[PlayerService] executeBasicCode called with:', code);
+ this.isPlaying = true;
const lines = code.split('\n');
- for (const line of lines) {
- if (line.trim().startsWith('REM') || line.trim() === '')
- continue;
- const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/);
- if (!match)
- continue;
- const [_, command, identifier, value] = match;
- console.log('[PlayerService] Executing command:', { command, identifier, value });
- await this.executeCommand(command, identifier, value);
- await new Promise(resolve => setTimeout(resolve, 500));
+ try {
+ for (const line of lines) {
+ if (!this.isPlaying)
+ break;
+ if (line.trim().startsWith('REM') || line.trim() === '')
+ continue;
+ const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/);
+ if (!match)
+ continue;
+ const [_, command, identifier, value] = match;
+ console.log('[PlayerService] Executing command:', { command, identifier, value });
+ await this.captureAndAnalyzeScreen();
+ await this.executeCommand(command, identifier, value);
+ await new Promise(resolve => setTimeout(resolve, 500));
+ }
}
+ catch (error) {
+ console.error('[PlayerService] Execution error:', error);
+ this.isPlaying = false;
+ throw error;
+ }
+ }
+ async captureAndAnalyzeScreen() {
+ console.log('[PlayerService] captureAndAnalyzeScreen called');
+ const sources = await electron_1.ipcRenderer.invoke('get-screenshot');
+ this.currentScreenshot = sources[0].thumbnail;
}
async executeCommand(command, identifier, value) {
console.log('[PlayerService] executeCommand called with:', { command, identifier, value });
- const screenshotPath = await this.captureScreen();
- console.log('[PlayerService] Screen captured at:', screenshotPath);
- const analysis = await this.openAIService.analyzeScreen(screenshotPath);
- const element = analysis.elements.find(e => e.identifier === identifier);
- if (!element)
- throw new Error(`Element not found: ${identifier}`);
+ const element = await this.openAIService.analyzeScreenWithContext({
+ screenshot: this.currentScreenshot,
+ transcription: '',
+ identifier, cursorPosition: null
+ });
+ //@ts-nocheck
+ if (!element) {
+ console.warn(`[PlayerService] Element not found: ${identifier}, retrying with fresh analysis`);
+ await this.captureAndAnalyzeScreen();
+ const newElement = await this.openAIService.analyzeScreenWithContext({
+ screenshot: this.currentScreenshot,
+ transcription: '',
+ cursorPosition: await electron_1.ipcRenderer.invoke('get-cursor-position'),
+ identifier
+ });
+ if (!newElement)
+ throw new Error(`Element not found after retry: ${identifier}`);
+ }
const centerX = element.bounds.x + element.bounds.width / 2;
const centerY = element.bounds.y + element.bounds.height / 2;
switch (command) {
case 'CLICK':
console.log('[PlayerService] Simulating click at:', { centerX, centerY });
- await this.simulateClick(centerX, centerY);
+ await electron_1.ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY });
break;
case 'TYPE':
console.log('[PlayerService] Simulating type:', { centerX, centerY, value });
- await this.simulateClick(centerX, centerY);
- await this.simulateTyping(value || '');
+ await electron_1.ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY });
+ await electron_1.ipcRenderer.invoke('simulate-type', { text: value || '' });
+ break;
+ case 'MOVE':
+ console.log('[PlayerService] Simulating move:', { centerX, centerY });
+ await electron_1.ipcRenderer.invoke('simulate-move', { x: centerX, y: centerY });
break;
}
}
- async captureScreen() {
- console.log('[PlayerService] captureScreen called');
- return new Promise((resolve, reject) => {
- electron_1.ipcMain.once('screen-captured', (_, screenshotPath) => {
- console.log('[PlayerService] Screen captured event received:', screenshotPath);
- resolve(screenshotPath);
- });
- electron_1.ipcMain.emit('capture-screen');
- });
- }
- async simulateClick(x, y) {
- console.log('[PlayerService] simulateClick called with:', { x, y });
- return new Promise((resolve) => {
- electron_1.ipcMain.once('click-completed', () => {
- console.log('[PlayerService] Click completed');
- resolve();
- });
- electron_1.ipcMain.emit('simulate-click', { x, y });
- });
- }
- async simulateTyping(text) {
- console.log('[PlayerService] simulateTyping called with:', text);
- return new Promise((resolve) => {
- electron_1.ipcMain.once('typing-completed', () => {
- console.log('[PlayerService] Typing completed');
- resolve();
- });
- electron_1.ipcMain.emit('simulate-typing', { text });
- });
+ stop() {
+ console.log('[PlayerService] Stopping playback');
+ this.isPlaying = false;
}
}
exports.PlayerService = PlayerService;
diff --git a/dist/services/recorder.service.js b/dist/services/recorder.service.js
index 0ac1c5c..263398e 100644
--- a/dist/services/recorder.service.js
+++ b/dist/services/recorder.service.js
@@ -30,57 +30,72 @@ const path = __importStar(require("path"));
const fs = __importStar(require("fs"));
class RecorderService {
constructor() {
- this.events = [];
+ this.eventGroups = [];
+ this.currentEvents = [];
this.recording = false;
this.currentScreenshot = '';
- this.lastTranscription = '';
- this.currentAudioFile = '';
+ this.audioBuffer = [];
+ this.isListeningToMicrophone = false;
this.silenceTimer = null;
this.isProcessingAudio = false;
- this.handleAudioLevel = async (_, level) => {
- console.log('RecorderService.handleAudioLevel()', { level });
- if (!this.recording)
+ this.SILENCE_THRESHOLD = 0.01;
+ this.SILENCE_DURATION = 1500; // 1.5 seconds of silence to trigger processing
+ this.MIN_AUDIO_DURATION = 500; // Minimum audio duration to process
+ this.handleAudioLevel = (_, level) => {
+ if (!this.recording || !this.isListeningToMicrophone)
return;
- const SILENCE_THRESHOLD = 0.01;
- const SILENCE_DURATION = 1000;
- if (level < SILENCE_THRESHOLD) {
- if (!this.silenceTimer && !this.isProcessingAudio) {
- console.log('RecorderService.handleAudioLevel() - Setting silence timer');
+ if (level < this.SILENCE_THRESHOLD) {
+ if (!this.silenceTimer && !this.isProcessingAudio && this.audioBuffer.length > 0) {
this.silenceTimer = setTimeout(async () => {
if (this.recording) {
- await this.processSilence();
+ await this.processCapturedAudio();
}
- }, SILENCE_DURATION);
+ }, this.SILENCE_DURATION);
}
}
else {
if (this.silenceTimer) {
- console.log('RecorderService.handleAudioLevel() - Clearing silence timer');
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
}
};
- this.handleAudioChunk = async (_, chunk) => {
- console.log('RecorderService.handleAudioChunk()', { chunkSize: chunk.length });
+ this.handleAudioChunk = (_, chunk) => {
+ if (!this.recording || !this.isListeningToMicrophone)
+ return;
+ this.audioBuffer.push(chunk);
+ };
+ this.handleKeyboardEvent = async (_, event) => {
if (!this.recording)
return;
- try {
- const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`);
- fs.writeFileSync(audioFilePath, chunk);
- if (this.silenceTimer) {
- clearTimeout(this.silenceTimer);
- this.silenceTimer = null;
- await this.processAudioFile(audioFilePath);
- }
- }
- catch (error) {
- console.error('RecorderService.handleAudioChunk() error:', error);
+ this.currentEvents.push({
+ type: 'type',
+ identifier: event.key,
+ value: event.key,
+ timestamp: Date.now(),
+ narration: ''
+ });
+ };
+ this.handleMouseEvent = async (_, event) => {
+ if (!this.recording)
+ return;
+ const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
+ const element = this.findElementAtPosition(analysis, event.clientX, event.clientY);
+ if (element) {
+ this.currentEvents.push({
+ type: 'click',
+ identifier: element.identifier,
+ timestamp: Date.now(),
+ narration: ''
+ });
}
};
console.log('RecorderService.constructor()');
this.openAIService = new openai_service_1.OpenAIService();
this.tempDir = path.join(process.cwd(), 'temp_recordings');
+ this.ensureTempDirectory();
+ }
+ ensureTempDirectory() {
if (!fs.existsSync(this.tempDir)) {
fs.mkdirSync(this.tempDir, { recursive: true });
}
@@ -89,10 +104,11 @@ class RecorderService {
console.log('RecorderService.startRecording()');
try {
this.recording = true;
- this.events = [];
- await this.setupAudioRecording();
- await this.requestScreenshot();
- electron_1.ipcRenderer.on('keyboard-event', this.keyboardHandleEvent);
+ this.eventGroups = [];
+ this.currentEvents = [];
+ await this.startMicrophoneCapture();
+ await this.captureInitialScreenshot();
+ this.setupEventListeners();
}
catch (error) {
console.error('RecorderService.startRecording() error:', error);
@@ -100,171 +116,118 @@ class RecorderService {
throw error;
}
}
- async setupAudioRecording() {
- console.log('RecorderService.setupAudioRecording()');
+ async startMicrophoneCapture() {
+ console.log('RecorderService.startMicrophoneCapture()');
try {
+ this.isListeningToMicrophone = true;
electron_1.ipcRenderer.on('audio-level', this.handleAudioLevel);
electron_1.ipcRenderer.on('audio-chunk', this.handleAudioChunk);
+ await electron_1.ipcRenderer.invoke('start-microphone-capture');
}
catch (error) {
- console.error('RecorderService.setupAudioRecording() error:', error);
- throw new Error(`Failed to setup audio recording: ${error.message}`);
+ console.error('Failed to start microphone capture:', error);
+ throw new Error(`Microphone initialization failed: ${error.message}`);
}
}
- async processSilence() {
- console.log('RecorderService.processSilence()');
- if (this.isProcessingAudio)
+ async processCapturedAudio() {
+ if (this.isProcessingAudio || this.audioBuffer.length === 0)
return;
this.isProcessingAudio = true;
+ const combinedBuffer = Buffer.concat(this.audioBuffer);
+ this.audioBuffer = []; // Clear the buffer
try {
- const audioFilePath = await electron_1.ipcRenderer.invoke('save-audio-chunk');
- console.log('RecorderService.processSilence() - Audio saved to:', audioFilePath);
- if (audioFilePath) {
- this.currentAudioFile = audioFilePath;
- await this.processAudioFile(audioFilePath);
- await this.requestScreenshot();
+ const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`);
+ fs.writeFileSync(audioFilePath, combinedBuffer);
+ const transcription = await this.openAIService.transcribeAudio(new Blob([combinedBuffer], { type: 'audio/wav' }));
+ if (transcription.text.trim()) {
+ await this.processNarrationWithEvents(transcription.text);
}
+ fs.unlinkSync(audioFilePath);
}
catch (error) {
- console.error('RecorderService.processSilence() error:', error);
+ console.error('Audio processing error:', error);
}
finally {
this.isProcessingAudio = false;
}
}
- async processAudioFile(audioFilePath) {
- console.log('RecorderService.processAudioFile()', { audioFilePath });
- try {
- const audioBuffer = fs.readFileSync(audioFilePath);
- const transcription = await this.openAIService.transcribeAudio(new Blob([audioBuffer], { type: 'audio/wav' }));
- console.log('RecorderService.processAudioFile() - Transcription:', transcription);
- if (transcription.text.trim()) {
- await this.processTranscription(transcription);
- }
- fs.unlinkSync(audioFilePath);
- }
- catch (error) {
- console.error('RecorderService.processAudioFile() error:', error);
- }
- }
- async processTranscription(transcription) {
- console.log('RecorderService.processTranscription()', { transcription });
- this.lastTranscription = transcription.text;
- const cursorPosition = await electron_1.ipcRenderer.invoke('get-cursor-position');
- console.log('RecorderService.processTranscription() - Cursor position:', cursorPosition);
- const analysis = await this.openAIService.analyzeScreenWithContext({
+ async processNarrationWithEvents(narration) {
+ if (this.currentEvents.length === 0)
+ return;
+ const eventGroup = {
+ narration,
+ events: [...this.currentEvents],
screenshot: this.currentScreenshot,
- transcription: this.lastTranscription,
- cursorPosition
+ timestamp: Date.now()
+ };
+ this.eventGroups.push(eventGroup);
+ this.currentEvents = []; // Clear current events for next group
+ await this.captureInitialScreenshot(); // Get fresh screenshot for next group
+ }
+ setupEventListeners() {
+ electron_1.ipcRenderer.on('keyboard-event', this.handleKeyboardEvent);
+ electron_1.ipcRenderer.on('mouse-event', this.handleMouseEvent);
+ }
+ async captureInitialScreenshot() {
+ const sources = await electron_1.ipcRenderer.invoke('get-screenshot');
+ this.currentScreenshot = sources[0].thumbnail;
+ }
+ findElementAtPosition(analysis, x, y) {
+ return analysis.elements.find(element => {
+ const bounds = element.bounds;
+ return x >= bounds.x &&
+ x <= bounds.x + bounds.width &&
+ y >= bounds.y &&
+ y <= bounds.y + bounds.height;
});
- console.log('RecorderService.processTranscription() - Screen analysis:', analysis);
- if (analysis) {
- this.events.push({
- type: analysis.type,
- identifier: analysis.identifier,
- value: analysis.value,
- timestamp: Date.now(),
- narration: this.lastTranscription
- });
- }
}
async stopRecording() {
console.log('RecorderService.stopRecording()');
+ // Process any remaining audio
+ if (this.audioBuffer.length > 0) {
+ await this.processCapturedAudio();
+ }
+ this.cleanup();
+ return this.generateBasicCode();
+ }
+ cleanup() {
this.recording = false;
+ this.isListeningToMicrophone = false;
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
electron_1.ipcRenderer.removeListener('audio-level', this.handleAudioLevel);
electron_1.ipcRenderer.removeListener('audio-chunk', this.handleAudioChunk);
- electron_1.ipcRenderer.removeListener('keyboard-event', this.keyboardHandleEvent);
- if (this.currentAudioFile && fs.existsSync(this.currentAudioFile)) {
- fs.unlinkSync(this.currentAudioFile);
- }
- const code = this.generateBasicCode();
- console.log('RecorderService.stopRecording() - Generated code:', code);
- return code;
- }
- async requestScreenshot() {
- console.log('RecorderService.requestScreenshot()');
- try {
- const sources = await electron_1.ipcRenderer.invoke('get-screenshot');
- console.log('RecorderService.requestScreenshot() - Sources:', sources);
- const screenSource = sources[0];
- await this.screenshotHandleEvent(null, screenSource.thumbnail);
- }
- catch (error) {
- console.error('RecorderService.requestScreenshot() error:', error);
- }
- }
- async screenshotHandleEvent(_, screenshot) {
- console.log('RecorderService.screenshotHandleEvent()', { screenshot });
- this.currentScreenshot = screenshot;
- }
- async keyboardHandleEvent(_, event) {
- console.log('RecorderService.keyboardHandleEvent()', { key: event.key });
- if (!this.recording)
- return;
- this.events.push({
- type: 'type',
- identifier: event.key,
- timestamp: Date.now(),
- narration: this.lastTranscription
- });
- }
- async mouseHandleEvent(_, event) {
- console.log('RecorderService.mouseHandleEvent()', { x: event.x, y: event.y });
- if (!this.recording)
- return;
- const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
- console.log('RecorderService.mouseHandleEvent() - Screen analysis:', analysis);
- const element = this.findElementAtPosition(analysis, event.x, event.y);
- console.log('RecorderService.mouseHandleEvent() - Found element:', element);
- if (element) {
- this.events.push({
- type: 'click',
- identifier: element.identifier,
- timestamp: Date.now(),
- narration: this.lastTranscription
- });
- }
- }
- findElementAtPosition(analysis, x, y) {
- console.log('RecorderService.findElementAtPosition()', { x, y, analysisElementsCount: analysis.elements.length });
- return analysis.elements.find((element) => {
- const bounds = element.bounds;
- const found = x >= bounds.x &&
- x <= bounds.x + bounds.width &&
- y >= bounds.y &&
- y <= bounds.y + bounds.height;
- if (found) {
- console.log('RecorderService.findElementAtPosition() - Found matching element:', element);
- }
- return found;
+ electron_1.ipcRenderer.removeListener('keyboard-event', this.handleKeyboardEvent);
+ electron_1.ipcRenderer.removeListener('mouse-event', this.handleMouseEvent);
+ // Cleanup temp directory
+ fs.readdirSync(this.tempDir).forEach(file => {
+ fs.unlinkSync(path.join(this.tempDir, file));
});
}
generateBasicCode() {
- console.log('RecorderService.generateBasicCode()', { eventsCount: this.events.length });
let basicCode = '10 REM BotDesktop Automation Script\n';
let lineNumber = 20;
- for (const event of this.events) {
- basicCode += `${lineNumber} REM ${event.narration}\n`;
+ this.eventGroups.forEach(group => {
+ basicCode += `${lineNumber} REM ${group.narration}\n`;
lineNumber += 10;
- switch (event.type) {
- case 'click':
- basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
- break;
- case 'type':
- basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
- break;
- case 'move':
- basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
- break;
- }
- lineNumber += 10;
- }
+ group.events.forEach(event => {
+ switch (event.type) {
+ case 'click':
+ basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
+ break;
+ case 'type':
+ basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
+ break;
+ case 'move':
+ basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
+ break;
+ }
+ lineNumber += 10;
+ });
+ });
basicCode += `${lineNumber} END\n`;
- console.log('RecorderService.generateBasicCode() - Generated code:', basicCode);
return basicCode;
}
}
diff --git a/src/components/App.tsx b/src/components/App.tsx
index bdf15ed..c6b9bfe 100644
--- a/src/components/App.tsx
+++ b/src/components/App.tsx
@@ -36,10 +36,10 @@ const App: React.FC = () => {
};
return (
-
-
BotDesktop Automation
+
+
General Bots Desktop
-
+
-
+
Generated BASIC Code:
-
{basicCode}
+
{basicCode}
+
+
+
+
+
);
};
diff --git a/src/main/main.ts b/src/main/main.ts
index 6ac3348..48b0818 100644
--- a/src/main/main.ts
+++ b/src/main/main.ts
@@ -1,35 +1,139 @@
require('dotenv').config();
require('electron-require');
-
-import { app, BrowserWindow, desktopCapturer, ipcMain } from 'electron';
+import { app } from 'electron';
+import { BrowserWindow, desktopCapturer, ipcMain } from 'electron';
import * as path from 'path';
import { systemPreferences } from 'electron';
import { RecorderService } from '../services/recorder.service';
import { PlayerService } from '../services/player.service';
+interface AudioCapture {
+ mediaRecorder: MediaRecorder | null;
+ audioStream: MediaStream | null;
+ analyserNode: AnalyserNode | null;
+ audioData: Uint8Array | null;
+ isCapturing: boolean;
+}
+
+const audioCapture: AudioCapture = {
+ mediaRecorder: null,
+ audioStream: null,
+ analyserNode: null,
+ audioData: null,
+ isCapturing: false
+};
+
const recorder = new RecorderService();
const player = new PlayerService();
function createWindow() {
const mainWindow = new BrowserWindow({
- width: 1200,
- height: 800,
-
+ width: 700,
+ height: 500,
+ backgroundColor: "grey",
+ center: true,
+ maximizable: false,
+ thickFrame: true,
+ autoHideMenuBar:true,
webPreferences: {
+ experimentalFeatures: true,
nodeIntegrationInWorker: true,
nodeIntegration: true,
nodeIntegrationInSubFrames: true,
contextIsolation: false,
preload: path.join(__dirname, '../preload/preload.js')
}
+
});
+ mainWindow.setAutoHideMenuBar(true);
+ mainWindow. setMaximizable(false);
+
if (process.env.NODE_ENV === 'development') {
mainWindow.loadURL('http://localhost:8080');
mainWindow.webContents.openDevTools();
} else {
mainWindow.loadFile(path.join(__dirname, '../../src/renderer/index.html'));
- }
+ } ipcMain.handle('mouse-event', recorder.handleMouseEvent.bind(recorder));
+ ipcMain.handle('keyboard-event', recorder.handleKeyboardEvent.bind(recorder));
+
+
+ // Handler to capture the entire screen
+ ipcMain.handle('get-screenshot', async () => {
+ console.log('get-screenshot called');
+ const sources = await desktopCapturer.getSources({ types: ['screen'] });
+ const screenSource = sources[0]; // Get the first screen source
+
+ const { thumbnail } = screenSource; // Thumbnail is a native image
+ return thumbnail.toPNG(); // Return the screenshot as PNG buffer
+ });
+
+ ipcMain.handle('start-recording', async () => {
+ console.log('start-recording called');
+ await recorder.startRecording();
+ });
+
+ ipcMain.handle('stop-recording', async () => {
+ console.log('stop-recording called');
+ return await recorder.stopRecording();
+ });
+
+ ipcMain.handle('execute-basic-code', async (_, code: string) => {
+ console.log('execute-basic-code called with:', code);
+ await player.executeBasicCode(code);
+ });
+
+ ipcMain.handle('check-microphone-permission', async () => {
+ console.log('check-microphone-permission called');
+ if (process.platform === 'darwin') {
+ const status = await systemPreferences.getMediaAccessStatus('microphone');
+ if (status !== 'granted') {
+ const success = await systemPreferences.askForMediaAccess('microphone');
+ return success;
+ }
+ return true;
+ }
+ return true; // On Windows/Linux, permissions are handled by the OS
+ });
+
+
+ ipcMain.handle('start-microphone-capture', async (event) => {
+ debugger;
+ const window = BrowserWindow.fromWebContents(event.sender);
+ if (!window) {
+ throw new Error('No window found for this request');
+ }
+ return startMicrophoneCapture(window);
+ });
+
+ ipcMain.handle('stop-microphone-capture', async (event) => {
+ const window = BrowserWindow.fromWebContents(event.sender);
+ if (!window) {
+ throw new Error('No window found for this request');
+ }
+ return stopMicrophoneCapture(window);
+ });
+
+ ipcMain.handle('start-microphone-capture', async (event, ...args) => {
+ // Perform asynchronous microphone capture logic here
+ try {
+ const result = await startMicrophoneCapture(args[0]); // Assuming this function is async
+ return result;
+ } catch (error) {
+ console.error("Error during microphone capture:", error);
+ throw error; // Send the error back to the renderer
+ }
+ });
+ ipcMain.handle('stop-microphone-capture', async (event, ...args) => {
+ try {
+ const result = await stopMicrophoneCapture(args[0]);
+ return result;
+ } catch (error) {
+ console.error("Error stopping microphone capture:", error);
+ throw error; // Send the error back to the renderer
+ }
+ });
+
}
app.whenReady().then(createWindow);
@@ -46,47 +150,153 @@ app.on('activate', () => {
}
});
-ipcMain.handle('mouse-event', recorder.mouseHandleEvent.bind(recorder));
-ipcMain.handle('keyboard-event', recorder.keyboardHandleEvent.bind(recorder));
-ipcMain.handle('screenshot-captured', recorder.screenshotHandleEvent.bind(recorder));
-
-// Handler to capture the entire screen
-ipcMain.handle('get-screenshot', async () => {
- console.log('get-screenshot called');
- const sources = await desktopCapturer.getSources({ types: ['screen'] });
- const screenSource = sources[0]; // Get the first screen source
-
- const { thumbnail } = screenSource; // Thumbnail is a native image
- return thumbnail.toPNG(); // Return the screenshot as PNG buffer
-});
-
-ipcMain.handle('start-recording', async () => {
- console.log('start-recording called');
- await recorder.startRecording();
-});
-
-ipcMain.handle('stop-recording', async () => {
- console.log('stop-recording called');
- return await recorder.stopRecording();
-});
-
-ipcMain.handle('execute-basic-code', async (_, code: string) => {
- console.log('execute-basic-code called with:', code);
- await player.executeBasicCode(code);
-});
-
-ipcMain.handle('check-microphone-permission', async () => {
- console.log('check-microphone-permission called');
- if (process.platform === 'darwin') {
- const status = await systemPreferences.getMediaAccessStatus('microphone');
- if (status !== 'granted') {
- const success = await systemPreferences.askForMediaAccess('microphone');
- return success;
- }
- return true;
- }
- return true; // On Windows/Linux, permissions are handled by the OS
-});
-
// Enable required permissions
app.commandLine.appendSwitch('enable-speech-dispatcher');
+
+
+// Register cleanup on app quit
+app.on('will-quit', cleanupAudioCapture);
+
+
+
+// Function to get the focused window or first available window
+function getFocusedWindow(): BrowserWindow | null {
+ const focusedWindow = BrowserWindow.getFocusedWindow();
+ if (focusedWindow) return focusedWindow;
+
+ const windows = BrowserWindow.getAllWindows();
+ return windows.length > 0 ? windows[0] : null;
+}
+
+// Function to safely send to window
+function sendToWindow(channel: string, ...args: any[]) {
+ const window = getFocusedWindow();
+ if (window && !window.isDestroyed()) {
+ window.webContents.send(channel, ...args);
+ }
+}
+async function startMicrophoneCapture(window: BrowserWindow): Promise
{
+ console.log('Starting microphone capture...');
+
+ try {
+
+ // Request microphone access
+ //@ts-ignore
+ const stream = await window.myApi.startMicrophone()
+
+ audioCapture.audioStream = stream;
+
+ // Set up audio analysis
+ const audioContext = new ((window as any).AudioContext || (window as any).webkitAudioContext)();
+ const sourceNode = audioContext.createMediaStreamSource(stream);
+ audioCapture.analyserNode = audioContext.createAnalyser();
+ audioCapture.analyserNode.fftSize = 2048;
+
+ sourceNode.connect(audioCapture.analyserNode);
+ audioCapture.audioData = new Uint8Array(audioCapture.analyserNode.frequencyBinCount);
+
+ // Set up MediaRecorder
+ audioCapture.mediaRecorder = new MediaRecorder(stream, {
+ mimeType: 'audio/webm;codecs=opus'
+ });
+
+ // Handle audio data
+ audioCapture.mediaRecorder.ondataavailable = (event: BlobEvent) => {
+ if (event.data.size > 0 && !window.isDestroyed()) {
+ // Convert blob to buffer and send to renderer
+ const reader = new FileReader();
+ reader.onloadend = () => {
+ const buffer = Buffer.from(reader.result as ArrayBuffer);
+ window.webContents.send('audio-chunk', buffer);
+ };
+ reader.readAsArrayBuffer(event.data);
+ }
+ };
+
+ // Start recording
+ audioCapture.mediaRecorder.start(1000); // Capture in 1-second chunks
+ audioCapture.isCapturing = true;
+
+ // Start audio level monitoring
+ monitorAudioLevels(window);
+
+ console.log('Microphone capture started successfully');
+ } catch (error) {
+ console.error('Failed to start microphone capture:', error);
+ throw error;
+ }
+}
+
+function monitorAudioLevels(window: BrowserWindow) {
+ if (!audioCapture.isCapturing || !audioCapture.analyserNode || !audioCapture.audioData || window.isDestroyed()) {
+ return;
+ }
+
+ // Get audio level data
+ audioCapture.analyserNode.getByteFrequencyData(audioCapture.audioData);
+
+ // Calculate average volume level (0-1)
+ const average = audioCapture.audioData.reduce((acc, value) => acc + value, 0) /
+ audioCapture.audioData.length /
+ 255;
+
+ // Send level to renderer
+ if (!window.isDestroyed()) {
+ window.webContents.send('audio-level', average);
+ }
+
+ // Continue monitoring
+ requestAnimationFrame(() => monitorAudioLevels(window));
+}
+
+function stopMicrophoneCapture(window: BrowserWindow) {
+ console.log('Stopping microphone capture...');
+
+ try {
+ if (audioCapture.mediaRecorder && audioCapture.mediaRecorder.state !== 'inactive') {
+ audioCapture.mediaRecorder.stop();
+ }
+
+ if (audioCapture.audioStream) {
+ audioCapture.audioStream.getTracks().forEach(track => track.stop());
+ }
+
+ if (audioCapture.analyserNode) {
+ audioCapture.analyserNode.disconnect();
+ }
+
+ audioCapture.isCapturing = false;
+ audioCapture.mediaRecorder = null;
+ audioCapture.audioStream = null;
+ audioCapture.analyserNode = null;
+ audioCapture.audioData = null;
+
+ if (!window.isDestroyed()) {
+ window.webContents.send('microphone-stopped');
+ }
+
+ console.log('Microphone capture stopped successfully');
+ } catch (error) {
+ console.error('Failed to stop microphone capture:', error);
+ throw error;
+ }
+}
+
+// Error handler for audio processing
+function handleAudioError(error: Error, window: BrowserWindow): void {
+ console.error('Audio processing error:', error);
+ stopMicrophoneCapture(window);
+
+ // Notify renderer of error if window still exists
+ if (!window.isDestroyed()) {
+ window.webContents.send('audio-error', error.message);
+ }
+}
+
+// Clean up resources when app is closing
+export function cleanupAudioCapture(): void {
+ const window = getFocusedWindow();
+ if (window) {
+ stopMicrophoneCapture(window);
+ }
+}
diff --git a/src/preload/preload.ts b/src/preload/preload.ts
index ffed7db..9613b28 100644
--- a/src/preload/preload.ts
+++ b/src/preload/preload.ts
@@ -1,7 +1,20 @@
const { ipcRenderer } = require('electron');
+const { contextBridge } = require('electron');
+
+const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
+
//@ts-nocheck
(window as any).myApi = {
+
+ startMicrophone: ()=>{
+ alert(1);
+ if (navigator.mediaDevices) {
+ return navigator.mediaDevices.getUserMedia({ audio: true });
+ } else {
+ console.error("MediaDevices API not supported");
+ }
+ },
sendMessage: (message: any) => {
console.log('[preload] sendMessage called with:', message);
return ipcRenderer.send('message-from-renderer', message);
@@ -11,3 +24,4 @@ const { ipcRenderer } = require('electron');
return ipcRenderer.on('message-from-main', (event, arg) => callback(arg));
},
};
+
diff --git a/src/renderer/index.html b/src/renderer/index.html
index ef787dc..5065acd 100644
--- a/src/renderer/index.html
+++ b/src/renderer/index.html
@@ -2,7 +2,7 @@
- BotDesktop
+ General Bots Desktop
diff --git a/src/services/openai.service.ts b/src/services/openai.service.ts
index 53dedb1..e19ee5e 100644
--- a/src/services/openai.service.ts
+++ b/src/services/openai.service.ts
@@ -56,7 +56,7 @@ export class OpenAIService {
role: 'system',
content: `You are an AI that analyzes screenshots and voice commands to determine user intentions for automation.
You should identify UI elements and return specific actions in JSON format.
- Focus on the area near the cursor position when relevant.`
+ Focus on the area near the field ${context.identifier}.`
},
{
role: 'user',
diff --git a/src/services/player.service.ts b/src/services/player.service.ts
index 98074d5..09ace25 100644
--- a/src/services/player.service.ts
+++ b/src/services/player.service.ts
@@ -1,9 +1,20 @@
-import { ipcMain } from 'electron';
-import { AutomationEvent, ScreenAnalysis } from './types';
-import { OpenAIService } from './openai.service';
+import { ipcRenderer, ipcMain } from 'electron';
+import { AutomationEvent, ScreenAnalysis, WhisperResponse } from '../services/types';
+import { OpenAIService } from '../services/openai.service';
+import * as path from 'path';
+import * as fs from 'fs';
+
+interface EventGroup {
+ narration: string;
+ events: AutomationEvent[];
+ screenshot: string;
+ timestamp: number;
+}
export class PlayerService {
private openAIService: OpenAIService;
+ private currentScreenshot: string = '';
+ private isPlaying: boolean = false;
constructor() {
console.log('[PlayerService] Initializing');
@@ -12,31 +23,60 @@ export class PlayerService {
async executeBasicCode(code: string) {
console.log('[PlayerService] executeBasicCode called with:', code);
+ this.isPlaying = true;
const lines = code.split('\n');
- for (const line of lines) {
- if (line.trim().startsWith('REM') || line.trim() === '') continue;
-
- const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/);
- if (!match) continue;
+ try {
+ for (const line of lines) {
+ if (!this.isPlaying) break;
+ if (line.trim().startsWith('REM') || line.trim() === '') continue;
+
+ const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/);
+ if (!match) continue;
- const [_, command, identifier, value] = match;
- console.log('[PlayerService] Executing command:', { command, identifier, value });
- await this.executeCommand(command, identifier, value);
- await new Promise(resolve => setTimeout(resolve, 500));
+ const [_, command, identifier, value] = match;
+ console.log('[PlayerService] Executing command:', { command, identifier, value });
+
+ await this.captureAndAnalyzeScreen();
+ await this.executeCommand(command, identifier, value);
+ await new Promise(resolve => setTimeout(resolve, 500));
+ }
+ } catch (error) {
+ console.error('[PlayerService] Execution error:', error);
+ this.isPlaying = false;
+ throw error;
}
}
+ private async captureAndAnalyzeScreen() {
+ console.log('[PlayerService] captureAndAnalyzeScreen called');
+ const sources = await ipcRenderer.invoke('get-screenshot');
+ this.currentScreenshot = sources[0].thumbnail;
+ }
+
private async executeCommand(command: string, identifier: string, value?: string) {
console.log('[PlayerService] executeCommand called with:', { command, identifier, value });
- const screenshotPath = await this.captureScreen();
- console.log('[PlayerService] Screen captured at:', screenshotPath);
+ const element = await this.openAIService.analyzeScreenWithContext({
+ screenshot: this.currentScreenshot,
+ transcription: '',
+ identifier,cursorPosition: null
+ });
+
+ //@ts-nocheck
- const analysis = await this.openAIService.analyzeScreen(screenshotPath);
- const element = analysis.elements.find(e => e.identifier === identifier);
-
- if (!element) throw new Error(`Element not found: ${identifier}`);
+ if (!element) {
+ console.warn(`[PlayerService] Element not found: ${identifier}, retrying with fresh analysis`);
+ await this.captureAndAnalyzeScreen();
+ const newElement = await this.openAIService.analyzeScreenWithContext({
+ screenshot: this.currentScreenshot,
+ transcription: '',
+ cursorPosition: await ipcRenderer.invoke('get-cursor-position'),
+ identifier
+ });
+
+ if (!newElement) throw new Error(`Element not found after retry: ${identifier}`);
+ }
const centerX = element.bounds.x + element.bounds.width/2;
const centerY = element.bounds.y + element.bounds.height/2;
@@ -44,49 +84,22 @@ export class PlayerService {
switch (command) {
case 'CLICK':
console.log('[PlayerService] Simulating click at:', { centerX, centerY });
- await this.simulateClick(centerX, centerY);
+ await ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY });
break;
case 'TYPE':
console.log('[PlayerService] Simulating type:', { centerX, centerY, value });
- await this.simulateClick(centerX, centerY);
- await this.simulateTyping(value || '');
+ await ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY });
+ await ipcRenderer.invoke('simulate-type', { text: value || '' });
+ break;
+ case 'MOVE':
+ console.log('[PlayerService] Simulating move:', { centerX, centerY });
+ await ipcRenderer.invoke('simulate-move', { x: centerX, y: centerY });
break;
}
}
- private async captureScreen(): Promise {
- console.log('[PlayerService] captureScreen called');
- return new Promise((resolve, reject) => {
- ipcMain.once('screen-captured', (_, screenshotPath) => {
- console.log('[PlayerService] Screen captured event received:', screenshotPath);
- resolve(screenshotPath);
- });
-
- ipcMain.emit('capture-screen');
- });
+ public stop() {
+ console.log('[PlayerService] Stopping playback');
+ this.isPlaying = false;
}
-
- private async simulateClick(x: number, y: number): Promise {
- console.log('[PlayerService] simulateClick called with:', { x, y });
- return new Promise((resolve) => {
- ipcMain.once('click-completed', () => {
- console.log('[PlayerService] Click completed');
- resolve();
- });
-
- ipcMain.emit('simulate-click', { x, y });
- });
- }
-
- private async simulateTyping(text: string): Promise {
- console.log('[PlayerService] simulateTyping called with:', text);
- return new Promise((resolve) => {
- ipcMain.once('typing-completed', () => {
- console.log('[PlayerService] Typing completed');
- resolve();
- });
-
- ipcMain.emit('simulate-typing', { text });
- });
- }
-}
+}
\ No newline at end of file
diff --git a/src/services/recorder.service.ts b/src/services/recorder.service.ts
index 46e45e2..7655012 100644
--- a/src/services/recorder.service.ts
+++ b/src/services/recorder.service.ts
@@ -1,24 +1,32 @@
import { ipcRenderer } from 'electron';
-import { AutomationEvent, ScreenAnalysis, WhisperResponse } from '../services/types';
+import { AutomationEvent, EventGroup, ScreenAnalysis, WhisperResponse } from '../services/types';
import { OpenAIService } from '../services/openai.service';
import * as path from 'path';
import * as fs from 'fs';
export class RecorderService {
- private events: AutomationEvent[] = [];
+ private eventGroups: EventGroup[] = [];
+ private currentEvents: AutomationEvent[] = [];
private recording: boolean = false;
private openAIService: OpenAIService;
private currentScreenshot: string = '';
- private lastTranscription: string = '';
- private currentAudioFile: string = '';
+ private audioBuffer: Buffer[] = [];
+ private isListeningToMicrophone: boolean = false;
private silenceTimer: NodeJS.Timeout | null = null;
private isProcessingAudio: boolean = false;
private tempDir: string;
+ private SILENCE_THRESHOLD = 0.01;
+ private SILENCE_DURATION = 1500; // 1.5 seconds of silence to trigger processing
+ private MIN_AUDIO_DURATION = 500; // Minimum audio duration to process
constructor() {
console.log('RecorderService.constructor()');
this.openAIService = new OpenAIService();
this.tempDir = path.join(process.cwd(), 'temp_recordings');
+ this.ensureTempDirectory();
+ }
+
+ private ensureTempDirectory() {
if (!fs.existsSync(this.tempDir)) {
fs.mkdirSync(this.tempDir, { recursive: true });
}
@@ -28,10 +36,11 @@ export class RecorderService {
console.log('RecorderService.startRecording()');
try {
this.recording = true;
- this.events = [];
- await this.setupAudioRecording();
- await this.requestScreenshot();
- ipcRenderer.on('keyboard-event', this.keyboardHandleEvent);
+ this.eventGroups = [];
+ this.currentEvents = [];
+ await this.startMicrophoneCapture();
+ await this.captureInitialScreenshot();
+ this.setupEventListeners();
} catch (error) {
console.error('RecorderService.startRecording() error:', error);
this.recording = false;
@@ -39,127 +48,148 @@ export class RecorderService {
}
}
- private async setupAudioRecording() {
- console.log('RecorderService.setupAudioRecording()');
+ private async startMicrophoneCapture() {
+ console.log('RecorderService.startMicrophoneCapture()');
try {
+ this.isListeningToMicrophone = true;
ipcRenderer.on('audio-level', this.handleAudioLevel);
ipcRenderer.on('audio-chunk', this.handleAudioChunk);
+ await ipcRenderer.invoke('start-microphone-capture');
} catch (error) {
- console.error('RecorderService.setupAudioRecording() error:', error);
- throw new Error(`Failed to setup audio recording: ${error.message}`);
+ console.error('Failed to start microphone capture:', error);
+ throw new Error(`Microphone initialization failed: ${error.message}`);
}
}
- private handleAudioLevel = async (_: any, level: number) => {
- console.log('RecorderService.handleAudioLevel()', { level });
- if (!this.recording) return;
+ public handleAudioLevel = (_: any, level: number) => {
+ if (!this.recording || !this.isListeningToMicrophone) return;
- const SILENCE_THRESHOLD = 0.01;
- const SILENCE_DURATION = 1000;
-
- if (level < SILENCE_THRESHOLD) {
- if (!this.silenceTimer && !this.isProcessingAudio) {
- console.log('RecorderService.handleAudioLevel() - Setting silence timer');
+ if (level < this.SILENCE_THRESHOLD) {
+ if (!this.silenceTimer && !this.isProcessingAudio && this.audioBuffer.length > 0) {
this.silenceTimer = setTimeout(async () => {
if (this.recording) {
- await this.processSilence();
+ await this.processCapturedAudio();
}
- }, SILENCE_DURATION);
+ }, this.SILENCE_DURATION);
}
} else {
if (this.silenceTimer) {
- console.log('RecorderService.handleAudioLevel() - Clearing silence timer');
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
}
}
- private handleAudioChunk = async (_: any, chunk: Buffer) => {
- console.log('RecorderService.handleAudioChunk()', { chunkSize: chunk.length });
- if (!this.recording) return;
+ public handleAudioChunk = (_: any, chunk: Buffer) => {
+ if (!this.recording || !this.isListeningToMicrophone) return;
+ this.audioBuffer.push(chunk);
+ }
+
+ private async processCapturedAudio() {
+ if (this.isProcessingAudio || this.audioBuffer.length === 0) return;
+
+ this.isProcessingAudio = true;
+ const combinedBuffer = Buffer.concat(this.audioBuffer);
+ this.audioBuffer = []; // Clear the buffer
try {
const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`);
- fs.writeFileSync(audioFilePath, chunk);
+ fs.writeFileSync(audioFilePath, combinedBuffer);
- if (this.silenceTimer) {
- clearTimeout(this.silenceTimer);
- this.silenceTimer = null;
- await this.processAudioFile(audioFilePath);
- }
- } catch (error) {
- console.error('RecorderService.handleAudioChunk() error:', error);
- }
- };
+ const transcription = await this.openAIService.transcribeAudio(
+ new Blob([combinedBuffer], { type: 'audio/wav' })
+ );
- private async processSilence() {
- console.log('RecorderService.processSilence()');
- if (this.isProcessingAudio) return;
-
- this.isProcessingAudio = true;
- try {
- const audioFilePath = await ipcRenderer.invoke('save-audio-chunk');
- console.log('RecorderService.processSilence() - Audio saved to:', audioFilePath);
- if (audioFilePath) {
- this.currentAudioFile = audioFilePath;
- await this.processAudioFile(audioFilePath);
- await this.requestScreenshot();
+ if (transcription.text.trim()) {
+ await this.processNarrationWithEvents(transcription.text);
}
+
+ fs.unlinkSync(audioFilePath);
} catch (error) {
- console.error('RecorderService.processSilence() error:', error);
+ console.error('Audio processing error:', error);
} finally {
this.isProcessingAudio = false;
}
}
- private async processAudioFile(audioFilePath: string) {
- console.log('RecorderService.processAudioFile()', { audioFilePath });
- try {
- const audioBuffer = fs.readFileSync(audioFilePath);
- const transcription = await this.openAIService.transcribeAudio(
- new Blob([audioBuffer], { type: 'audio/wav' })
- );
- console.log('RecorderService.processAudioFile() - Transcription:', transcription);
+ private async processNarrationWithEvents(narration: string) {
+ if (this.currentEvents.length === 0) return;
- if (transcription.text.trim()) {
- await this.processTranscription(transcription);
- }
+ const eventGroup: EventGroup = {
+ narration,
+ events: [...this.currentEvents],
+ screenshot: this.currentScreenshot,
+ timestamp: Date.now()
+ };
- fs.unlinkSync(audioFilePath);
- } catch (error) {
- console.error('RecorderService.processAudioFile() error:', error);
- }
+ this.eventGroups.push(eventGroup);
+ this.currentEvents = []; // Clear current events for next group
+ await this.captureInitialScreenshot(); // Get fresh screenshot for next group
}
- private async processTranscription(transcription: WhisperResponse) {
- console.log('RecorderService.processTranscription()', { transcription });
- this.lastTranscription = transcription.text;
-
- const cursorPosition = await ipcRenderer.invoke('get-cursor-position');
- console.log('RecorderService.processTranscription() - Cursor position:', cursorPosition);
-
- const analysis = await this.openAIService.analyzeScreenWithContext({
- screenshot: this.currentScreenshot,
- transcription: this.lastTranscription,
- cursorPosition
- });
- console.log('RecorderService.processTranscription() - Screen analysis:', analysis);
+ private setupEventListeners() {
+ ipcRenderer.on('keyboard-event', this.handleKeyboardEvent);
+ ipcRenderer.on('mouse-event', this.handleMouseEvent);
+ }
- if (analysis) {
- this.events.push({
- type: analysis.type,
- identifier: analysis.identifier,
- value: analysis.value,
+ private async captureInitialScreenshot() {
+ const sources = await ipcRenderer.invoke('get-screenshot');
+ this.currentScreenshot = sources[0].thumbnail;
+ }
+
+ public handleKeyboardEvent = async (_: any, event: KeyboardEvent) => {
+ if (!this.recording) return;
+
+ this.currentEvents.push({
+ type: 'type',
+ identifier: event.key,
+ value: event.key,
+ timestamp: Date.now(),
+ narration: ''
+ });
+ }
+
+ public handleMouseEvent = async (_: any, event: MouseEvent) => {
+ if (!this.recording) return;
+
+ const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
+ const element = this.findElementAtPosition(analysis, event.clientX, event.clientY);
+
+ if (element) {
+ this.currentEvents.push({
+ type: 'click',
+ identifier: element.identifier,
timestamp: Date.now(),
- narration: this.lastTranscription
+ narration: ''
});
}
}
+ private findElementAtPosition(analysis: ScreenAnalysis, x: number, y: number) {
+ return analysis.elements.find(element => {
+ const bounds = element.bounds;
+ return x >= bounds.x &&
+ x <= bounds.x + bounds.width &&
+ y >= bounds.y &&
+ y <= bounds.y + bounds.height;
+ });
+ }
+
public async stopRecording(): Promise {
console.log('RecorderService.stopRecording()');
+
+ // Process any remaining audio
+ if (this.audioBuffer.length > 0) {
+ await this.processCapturedAudio();
+ }
+
+ this.cleanup();
+ return this.generateBasicCode();
+ }
+
+ private cleanup() {
this.recording = false;
+ this.isListeningToMicrophone = false;
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
@@ -168,106 +198,40 @@ export class RecorderService {
ipcRenderer.removeListener('audio-level', this.handleAudioLevel);
ipcRenderer.removeListener('audio-chunk', this.handleAudioChunk);
- ipcRenderer.removeListener('keyboard-event', this.keyboardHandleEvent);
+ ipcRenderer.removeListener('keyboard-event', this.handleKeyboardEvent);
+ ipcRenderer.removeListener('mouse-event', this.handleMouseEvent);
- if (this.currentAudioFile && fs.existsSync(this.currentAudioFile)) {
- fs.unlinkSync(this.currentAudioFile);
- }
-
- const code = this.generateBasicCode();
- console.log('RecorderService.stopRecording() - Generated code:', code);
- return code;
- }
-
- private async requestScreenshot() {
- console.log('RecorderService.requestScreenshot()');
- try {
- const sources = await ipcRenderer.invoke('get-screenshot');
- console.log('RecorderService.requestScreenshot() - Sources:', sources);
- const screenSource = sources[0];
- await this.screenshotHandleEvent(null, screenSource.thumbnail);
- } catch (error) {
- console.error('RecorderService.requestScreenshot() error:', error);
- }
- }
-
- public async screenshotHandleEvent(_: any, screenshot: string) {
- console.log('RecorderService.screenshotHandleEvent()', { screenshot });
- this.currentScreenshot = screenshot;
- }
-
- public async keyboardHandleEvent(_: any, event: KeyboardEvent) {
- console.log('RecorderService.keyboardHandleEvent()', { key: event.key });
- if (!this.recording) return;
-
- this.events.push({
- type: 'type',
- identifier: event.key,
- timestamp: Date.now(),
- narration: this.lastTranscription
- });
- }
-
- public async mouseHandleEvent(_: any, event: any) {
- console.log('RecorderService.mouseHandleEvent()', { x: event.x, y: event.y });
- if (!this.recording) return;
-
- const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
- console.log('RecorderService.mouseHandleEvent() - Screen analysis:', analysis);
-
- const element = this.findElementAtPosition(analysis, event.x, event.y);
- console.log('RecorderService.mouseHandleEvent() - Found element:', element);
-
- if (element) {
- this.events.push({
- type: 'click',
- identifier: element.identifier,
- timestamp: Date.now(),
- narration: this.lastTranscription
- });
- }
- }
-
- private findElementAtPosition(analysis: ScreenAnalysis, x: number, y: number) {
- console.log('RecorderService.findElementAtPosition()', { x, y, analysisElementsCount: analysis.elements.length });
- return analysis.elements.find((element) => {
- const bounds = element.bounds;
- const found = x >= bounds.x &&
- x <= bounds.x + bounds.width &&
- y >= bounds.y &&
- y <= bounds.y + bounds.height;
- if (found) {
- console.log('RecorderService.findElementAtPosition() - Found matching element:', element);
- }
- return found;
+ // Cleanup temp directory
+ fs.readdirSync(this.tempDir).forEach(file => {
+ fs.unlinkSync(path.join(this.tempDir, file));
});
}
private generateBasicCode(): string {
- console.log('RecorderService.generateBasicCode()', { eventsCount: this.events.length });
let basicCode = '10 REM BotDesktop Automation Script\n';
let lineNumber = 20;
- for (const event of this.events) {
- basicCode += `${lineNumber} REM ${event.narration}\n`;
+ this.eventGroups.forEach(group => {
+ basicCode += `${lineNumber} REM ${group.narration}\n`;
lineNumber += 10;
- switch (event.type) {
- case 'click':
- basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
- break;
- case 'type':
- basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
- break;
- case 'move':
- basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
- break;
- }
- lineNumber += 10;
- }
+ group.events.forEach(event => {
+ switch (event.type) {
+ case 'click':
+ basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
+ break;
+ case 'type':
+ basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
+ break;
+ case 'move':
+ basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
+ break;
+ }
+ lineNumber += 10;
+ });
+ });
basicCode += `${lineNumber} END\n`;
- console.log('RecorderService.generateBasicCode() - Generated code:', basicCode);
return basicCode;
}
}
\ No newline at end of file
diff --git a/src/services/types.ts b/src/services/types.ts
index 97eca85..951649e 100644
--- a/src/services/types.ts
+++ b/src/services/types.ts
@@ -1,3 +1,22 @@
+
+export interface PlaybackEvent {
+ command: string;
+ args: string[];
+ description?: string;
+}
+
+export interface ScreenElement {
+ identifier: string;
+ bounds: {
+ x: number;
+ y: number;
+ width: number;
+ height: number;
+ };
+ windowName: string;
+ value?: string;
+}
+
export interface AutomationAction {
type: 'click' | 'type' | 'move';
identifier: string;
@@ -12,6 +31,12 @@ export interface AutomationAction {
}
+export interface EventGroup {
+ narration: string;
+ events: AutomationEvent[];
+ screenshot: string;
+ timestamp: number;
+}
export interface AutomationEvent {
type: 'click' | 'type' | 'move';
@@ -30,6 +55,7 @@ export interface ScreenContext {
screenshot: string;
transcription: string;
cursorPosition: { x: number, y: number };
+ identifier: string;
}
export interface ScreenAnalysis {