new(all): Improvements in IPC and MainWindow.

This commit is contained in:
me@rodrigorodriguez.com 2024-10-27 13:07:05 -03:00
parent c6f0933bfd
commit 19220010c1
14 changed files with 927 additions and 537 deletions

View file

@ -55,13 +55,15 @@ const App = () => {
console.error('Playback error:', error);
}
};
return (react_1.default.createElement("div", { className: "p-4" },
react_1.default.createElement("h1", { className: "text-2xl font-bold mb-4" }, "BotDesktop Automation"),
react_1.default.createElement("div", { className: "space-x-4 mb-4" },
return (react_1.default.createElement("div", { className: "p-4 h-auto" },
react_1.default.createElement("h1", { className: "text-2xl font-bold mb-4" }, "General Bots Desktop"),
react_1.default.createElement("div", { className: "space-x-4 mb-4 h-auto" },
react_1.default.createElement("button", { className: `px-4 py-2 rounded ${recording ? 'bg-red-500' : 'bg-blue-500'} text-white`, onClick: recording ? handleStopRecording : handleStartRecording }, recording ? 'Stop Recording' : 'Start Recording'),
react_1.default.createElement("button", { className: "px-4 py-2 rounded bg-green-500 text-white", onClick: handlePlayback, disabled: !basicCode }, "Play Recording")),
react_1.default.createElement("div", { className: "mt-4" },
react_1.default.createElement("div", { className: "mt-4 h-20" },
react_1.default.createElement("h2", { className: "text-xl font-bold mb-2" }, "Generated BASIC Code:"),
react_1.default.createElement("pre", { className: "bg-gray-100 p-2 rounded border" }, basicCode))));
react_1.default.createElement("pre", { className: "h-20 min-h-100 bg-gray-100 p-2 rounded border" }, basicCode)),
react_1.default.createElement("div", { className: "mb-4" },
react_1.default.createElement("a", { href: "https://github.com/General Bots" }, "General Bots"))));
};
exports.default = App;

251
dist/main/main.js vendored
View file

@ -23,20 +23,35 @@ var __importStar = (this && this.__importStar) || function (mod) {
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.cleanupAudioCapture = cleanupAudioCapture;
require('dotenv').config();
require('electron-require');
const electron_1 = require("electron");
const path = __importStar(require("path"));
const electron_2 = require("electron");
const path = __importStar(require("path"));
const electron_3 = require("electron");
const recorder_service_1 = require("../services/recorder.service");
const player_service_1 = require("../services/player.service");
const audioCapture = {
mediaRecorder: null,
audioStream: null,
analyserNode: null,
audioData: null,
isCapturing: false
};
const recorder = new recorder_service_1.RecorderService();
const player = new player_service_1.PlayerService();
function createWindow() {
const mainWindow = new electron_1.BrowserWindow({
width: 1200,
height: 800,
const mainWindow = new electron_2.BrowserWindow({
width: 700,
height: 500,
backgroundColor: "grey",
center: true,
maximizable: false,
thickFrame: true,
autoHideMenuBar: true,
webPreferences: {
experimentalFeatures: true,
nodeIntegrationInWorker: true,
nodeIntegration: true,
nodeIntegrationInSubFrames: true,
@ -44,6 +59,8 @@ function createWindow() {
preload: path.join(__dirname, '../preload/preload.js')
}
});
mainWindow.setAutoHideMenuBar(true);
mainWindow.setMaximizable(false);
if (process.env.NODE_ENV === 'development') {
mainWindow.loadURL('http://localhost:8080');
mainWindow.webContents.openDevTools();
@ -51,6 +68,76 @@ function createWindow() {
else {
mainWindow.loadFile(path.join(__dirname, '../../src/renderer/index.html'));
}
electron_2.ipcMain.handle('mouse-event', recorder.handleMouseEvent.bind(recorder));
electron_2.ipcMain.handle('keyboard-event', recorder.handleKeyboardEvent.bind(recorder));
// Handler to capture the entire screen
electron_2.ipcMain.handle('get-screenshot', async () => {
console.log('get-screenshot called');
const sources = await electron_2.desktopCapturer.getSources({ types: ['screen'] });
const screenSource = sources[0]; // Get the first screen source
const { thumbnail } = screenSource; // Thumbnail is a native image
return thumbnail.toPNG(); // Return the screenshot as PNG buffer
});
electron_2.ipcMain.handle('start-recording', async () => {
console.log('start-recording called');
await recorder.startRecording();
});
electron_2.ipcMain.handle('stop-recording', async () => {
console.log('stop-recording called');
return await recorder.stopRecording();
});
electron_2.ipcMain.handle('execute-basic-code', async (_, code) => {
console.log('execute-basic-code called with:', code);
await player.executeBasicCode(code);
});
electron_2.ipcMain.handle('check-microphone-permission', async () => {
console.log('check-microphone-permission called');
if (process.platform === 'darwin') {
const status = await electron_3.systemPreferences.getMediaAccessStatus('microphone');
if (status !== 'granted') {
const success = await electron_3.systemPreferences.askForMediaAccess('microphone');
return success;
}
return true;
}
return true; // On Windows/Linux, permissions are handled by the OS
});
electron_2.ipcMain.handle('start-microphone-capture', async (event) => {
debugger;
const window = electron_2.BrowserWindow.fromWebContents(event.sender);
if (!window) {
throw new Error('No window found for this request');
}
return startMicrophoneCapture(window);
});
electron_2.ipcMain.handle('stop-microphone-capture', async (event) => {
const window = electron_2.BrowserWindow.fromWebContents(event.sender);
if (!window) {
throw new Error('No window found for this request');
}
return stopMicrophoneCapture(window);
});
electron_2.ipcMain.handle('start-microphone-capture', async (event, ...args) => {
// Perform asynchronous microphone capture logic here
try {
const result = await startMicrophoneCapture(args[0]); // Assuming this function is async
return result;
}
catch (error) {
console.error("Error during microphone capture:", error);
throw error; // Send the error back to the renderer
}
});
electron_2.ipcMain.handle('stop-microphone-capture', async (event, ...args) => {
try {
const result = await stopMicrophoneCapture(args[0]);
return result;
}
catch (error) {
console.error("Error stopping microphone capture:", error);
throw error; // Send the error back to the renderer
}
});
}
electron_1.app.whenReady().then(createWindow);
electron_1.app.on('window-all-closed', () => {
@ -59,44 +146,128 @@ electron_1.app.on('window-all-closed', () => {
}
});
electron_1.app.on('activate', () => {
if (electron_1.BrowserWindow.getAllWindows().length === 0) {
if (electron_2.BrowserWindow.getAllWindows().length === 0) {
createWindow();
}
});
electron_1.ipcMain.handle('mouse-event', recorder.mouseHandleEvent.bind(recorder));
electron_1.ipcMain.handle('keyboard-event', recorder.keyboardHandleEvent.bind(recorder));
electron_1.ipcMain.handle('screenshot-captured', recorder.screenshotHandleEvent.bind(recorder));
// Handler to capture the entire screen
electron_1.ipcMain.handle('get-screenshot', async () => {
console.log('get-screenshot called');
const sources = await electron_1.desktopCapturer.getSources({ types: ['screen'] });
const screenSource = sources[0]; // Get the first screen source
const { thumbnail } = screenSource; // Thumbnail is a native image
return thumbnail.toPNG(); // Return the screenshot as PNG buffer
});
electron_1.ipcMain.handle('start-recording', async () => {
console.log('start-recording called');
await recorder.startRecording();
});
electron_1.ipcMain.handle('stop-recording', async () => {
console.log('stop-recording called');
return await recorder.stopRecording();
});
electron_1.ipcMain.handle('execute-basic-code', async (_, code) => {
console.log('execute-basic-code called with:', code);
await player.executeBasicCode(code);
});
electron_1.ipcMain.handle('check-microphone-permission', async () => {
console.log('check-microphone-permission called');
if (process.platform === 'darwin') {
const status = await electron_2.systemPreferences.getMediaAccessStatus('microphone');
if (status !== 'granted') {
const success = await electron_2.systemPreferences.askForMediaAccess('microphone');
return success;
}
return true;
}
return true; // On Windows/Linux, permissions are handled by the OS
});
// Enable required permissions
electron_1.app.commandLine.appendSwitch('enable-speech-dispatcher');
// Register cleanup on app quit
electron_1.app.on('will-quit', cleanupAudioCapture);
// Function to get the focused window or first available window
function getFocusedWindow() {
const focusedWindow = electron_2.BrowserWindow.getFocusedWindow();
if (focusedWindow)
return focusedWindow;
const windows = electron_2.BrowserWindow.getAllWindows();
return windows.length > 0 ? windows[0] : null;
}
// Function to safely send to window
function sendToWindow(channel, ...args) {
const window = getFocusedWindow();
if (window && !window.isDestroyed()) {
window.webContents.send(channel, ...args);
}
}
async function startMicrophoneCapture(window) {
console.log('Starting microphone capture...');
try {
// Request microphone access
//@ts-ignore
const stream = await window.myApi.startMicrophone();
audioCapture.audioStream = stream;
// Set up audio analysis
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const sourceNode = audioContext.createMediaStreamSource(stream);
audioCapture.analyserNode = audioContext.createAnalyser();
audioCapture.analyserNode.fftSize = 2048;
sourceNode.connect(audioCapture.analyserNode);
audioCapture.audioData = new Uint8Array(audioCapture.analyserNode.frequencyBinCount);
// Set up MediaRecorder
audioCapture.mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm;codecs=opus'
});
// Handle audio data
audioCapture.mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0 && !window.isDestroyed()) {
// Convert blob to buffer and send to renderer
const reader = new FileReader();
reader.onloadend = () => {
const buffer = Buffer.from(reader.result);
window.webContents.send('audio-chunk', buffer);
};
reader.readAsArrayBuffer(event.data);
}
};
// Start recording
audioCapture.mediaRecorder.start(1000); // Capture in 1-second chunks
audioCapture.isCapturing = true;
// Start audio level monitoring
monitorAudioLevels(window);
console.log('Microphone capture started successfully');
}
catch (error) {
console.error('Failed to start microphone capture:', error);
throw error;
}
}
function monitorAudioLevels(window) {
if (!audioCapture.isCapturing || !audioCapture.analyserNode || !audioCapture.audioData || window.isDestroyed()) {
return;
}
// Get audio level data
audioCapture.analyserNode.getByteFrequencyData(audioCapture.audioData);
// Calculate average volume level (0-1)
const average = audioCapture.audioData.reduce((acc, value) => acc + value, 0) /
audioCapture.audioData.length /
255;
// Send level to renderer
if (!window.isDestroyed()) {
window.webContents.send('audio-level', average);
}
// Continue monitoring
requestAnimationFrame(() => monitorAudioLevels(window));
}
function stopMicrophoneCapture(window) {
console.log('Stopping microphone capture...');
try {
if (audioCapture.mediaRecorder && audioCapture.mediaRecorder.state !== 'inactive') {
audioCapture.mediaRecorder.stop();
}
if (audioCapture.audioStream) {
audioCapture.audioStream.getTracks().forEach(track => track.stop());
}
if (audioCapture.analyserNode) {
audioCapture.analyserNode.disconnect();
}
audioCapture.isCapturing = false;
audioCapture.mediaRecorder = null;
audioCapture.audioStream = null;
audioCapture.analyserNode = null;
audioCapture.audioData = null;
if (!window.isDestroyed()) {
window.webContents.send('microphone-stopped');
}
console.log('Microphone capture stopped successfully');
}
catch (error) {
console.error('Failed to stop microphone capture:', error);
throw error;
}
}
// Error handler for audio processing
function handleAudioError(error, window) {
console.error('Audio processing error:', error);
stopMicrophoneCapture(window);
// Notify renderer of error if window still exists
if (!window.isDestroyed()) {
window.webContents.send('audio-error', error.message);
}
}
// Clean up resources when app is closing
function cleanupAudioCapture() {
const window = getFocusedWindow();
if (window) {
stopMicrophoneCapture(window);
}
}

View file

@ -1,6 +1,17 @@
const { ipcRenderer } = require('electron');
const { contextBridge } = require('electron');
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
//@ts-nocheck
window.myApi = {
startMicrophone: () => {
alert(1);
if (navigator.mediaDevices) {
return navigator.mediaDevices.getUserMedia({ audio: true });
}
else {
console.error("MediaDevices API not supported");
}
},
sendMessage: (message) => {
console.log('[preload] sendMessage called with:', message);
return ipcRenderer.send('message-from-renderer', message);

View file

@ -51,7 +51,7 @@ class OpenAIService {
role: 'system',
content: `You are an AI that analyzes screenshots and voice commands to determine user intentions for automation.
You should identify UI elements and return specific actions in JSON format.
Focus on the area near the cursor position when relevant.`
Focus on the area near the field ${context.identifier}.`
},
{
role: 'user',

View file

@ -2,78 +2,86 @@
Object.defineProperty(exports, "__esModule", { value: true });
exports.PlayerService = void 0;
const electron_1 = require("electron");
const openai_service_1 = require("./openai.service");
const openai_service_1 = require("../services/openai.service");
class PlayerService {
constructor() {
this.currentScreenshot = '';
this.isPlaying = false;
console.log('[PlayerService] Initializing');
this.openAIService = new openai_service_1.OpenAIService();
}
async executeBasicCode(code) {
console.log('[PlayerService] executeBasicCode called with:', code);
this.isPlaying = true;
const lines = code.split('\n');
for (const line of lines) {
if (line.trim().startsWith('REM') || line.trim() === '')
continue;
const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/);
if (!match)
continue;
const [_, command, identifier, value] = match;
console.log('[PlayerService] Executing command:', { command, identifier, value });
await this.executeCommand(command, identifier, value);
await new Promise(resolve => setTimeout(resolve, 500));
try {
for (const line of lines) {
if (!this.isPlaying)
break;
if (line.trim().startsWith('REM') || line.trim() === '')
continue;
const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/);
if (!match)
continue;
const [_, command, identifier, value] = match;
console.log('[PlayerService] Executing command:', { command, identifier, value });
await this.captureAndAnalyzeScreen();
await this.executeCommand(command, identifier, value);
await new Promise(resolve => setTimeout(resolve, 500));
}
}
catch (error) {
console.error('[PlayerService] Execution error:', error);
this.isPlaying = false;
throw error;
}
}
async captureAndAnalyzeScreen() {
console.log('[PlayerService] captureAndAnalyzeScreen called');
const sources = await electron_1.ipcRenderer.invoke('get-screenshot');
this.currentScreenshot = sources[0].thumbnail;
}
async executeCommand(command, identifier, value) {
console.log('[PlayerService] executeCommand called with:', { command, identifier, value });
const screenshotPath = await this.captureScreen();
console.log('[PlayerService] Screen captured at:', screenshotPath);
const analysis = await this.openAIService.analyzeScreen(screenshotPath);
const element = analysis.elements.find(e => e.identifier === identifier);
if (!element)
throw new Error(`Element not found: ${identifier}`);
const element = await this.openAIService.analyzeScreenWithContext({
screenshot: this.currentScreenshot,
transcription: '',
identifier, cursorPosition: null
});
//@ts-nocheck
if (!element) {
console.warn(`[PlayerService] Element not found: ${identifier}, retrying with fresh analysis`);
await this.captureAndAnalyzeScreen();
const newElement = await this.openAIService.analyzeScreenWithContext({
screenshot: this.currentScreenshot,
transcription: '',
cursorPosition: await electron_1.ipcRenderer.invoke('get-cursor-position'),
identifier
});
if (!newElement)
throw new Error(`Element not found after retry: ${identifier}`);
}
const centerX = element.bounds.x + element.bounds.width / 2;
const centerY = element.bounds.y + element.bounds.height / 2;
switch (command) {
case 'CLICK':
console.log('[PlayerService] Simulating click at:', { centerX, centerY });
await this.simulateClick(centerX, centerY);
await electron_1.ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY });
break;
case 'TYPE':
console.log('[PlayerService] Simulating type:', { centerX, centerY, value });
await this.simulateClick(centerX, centerY);
await this.simulateTyping(value || '');
await electron_1.ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY });
await electron_1.ipcRenderer.invoke('simulate-type', { text: value || '' });
break;
case 'MOVE':
console.log('[PlayerService] Simulating move:', { centerX, centerY });
await electron_1.ipcRenderer.invoke('simulate-move', { x: centerX, y: centerY });
break;
}
}
async captureScreen() {
console.log('[PlayerService] captureScreen called');
return new Promise((resolve, reject) => {
electron_1.ipcMain.once('screen-captured', (_, screenshotPath) => {
console.log('[PlayerService] Screen captured event received:', screenshotPath);
resolve(screenshotPath);
});
electron_1.ipcMain.emit('capture-screen');
});
}
async simulateClick(x, y) {
console.log('[PlayerService] simulateClick called with:', { x, y });
return new Promise((resolve) => {
electron_1.ipcMain.once('click-completed', () => {
console.log('[PlayerService] Click completed');
resolve();
});
electron_1.ipcMain.emit('simulate-click', { x, y });
});
}
async simulateTyping(text) {
console.log('[PlayerService] simulateTyping called with:', text);
return new Promise((resolve) => {
electron_1.ipcMain.once('typing-completed', () => {
console.log('[PlayerService] Typing completed');
resolve();
});
electron_1.ipcMain.emit('simulate-typing', { text });
});
stop() {
console.log('[PlayerService] Stopping playback');
this.isPlaying = false;
}
}
exports.PlayerService = PlayerService;

View file

@ -30,57 +30,72 @@ const path = __importStar(require("path"));
const fs = __importStar(require("fs"));
class RecorderService {
constructor() {
this.events = [];
this.eventGroups = [];
this.currentEvents = [];
this.recording = false;
this.currentScreenshot = '';
this.lastTranscription = '';
this.currentAudioFile = '';
this.audioBuffer = [];
this.isListeningToMicrophone = false;
this.silenceTimer = null;
this.isProcessingAudio = false;
this.handleAudioLevel = async (_, level) => {
console.log('RecorderService.handleAudioLevel()', { level });
if (!this.recording)
this.SILENCE_THRESHOLD = 0.01;
this.SILENCE_DURATION = 1500; // 1.5 seconds of silence to trigger processing
this.MIN_AUDIO_DURATION = 500; // Minimum audio duration to process
this.handleAudioLevel = (_, level) => {
if (!this.recording || !this.isListeningToMicrophone)
return;
const SILENCE_THRESHOLD = 0.01;
const SILENCE_DURATION = 1000;
if (level < SILENCE_THRESHOLD) {
if (!this.silenceTimer && !this.isProcessingAudio) {
console.log('RecorderService.handleAudioLevel() - Setting silence timer');
if (level < this.SILENCE_THRESHOLD) {
if (!this.silenceTimer && !this.isProcessingAudio && this.audioBuffer.length > 0) {
this.silenceTimer = setTimeout(async () => {
if (this.recording) {
await this.processSilence();
await this.processCapturedAudio();
}
}, SILENCE_DURATION);
}, this.SILENCE_DURATION);
}
}
else {
if (this.silenceTimer) {
console.log('RecorderService.handleAudioLevel() - Clearing silence timer');
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
}
};
this.handleAudioChunk = async (_, chunk) => {
console.log('RecorderService.handleAudioChunk()', { chunkSize: chunk.length });
this.handleAudioChunk = (_, chunk) => {
if (!this.recording || !this.isListeningToMicrophone)
return;
this.audioBuffer.push(chunk);
};
this.handleKeyboardEvent = async (_, event) => {
if (!this.recording)
return;
try {
const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`);
fs.writeFileSync(audioFilePath, chunk);
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
await this.processAudioFile(audioFilePath);
}
}
catch (error) {
console.error('RecorderService.handleAudioChunk() error:', error);
this.currentEvents.push({
type: 'type',
identifier: event.key,
value: event.key,
timestamp: Date.now(),
narration: ''
});
};
this.handleMouseEvent = async (_, event) => {
if (!this.recording)
return;
const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
const element = this.findElementAtPosition(analysis, event.clientX, event.clientY);
if (element) {
this.currentEvents.push({
type: 'click',
identifier: element.identifier,
timestamp: Date.now(),
narration: ''
});
}
};
console.log('RecorderService.constructor()');
this.openAIService = new openai_service_1.OpenAIService();
this.tempDir = path.join(process.cwd(), 'temp_recordings');
this.ensureTempDirectory();
}
ensureTempDirectory() {
if (!fs.existsSync(this.tempDir)) {
fs.mkdirSync(this.tempDir, { recursive: true });
}
@ -89,10 +104,11 @@ class RecorderService {
console.log('RecorderService.startRecording()');
try {
this.recording = true;
this.events = [];
await this.setupAudioRecording();
await this.requestScreenshot();
electron_1.ipcRenderer.on('keyboard-event', this.keyboardHandleEvent);
this.eventGroups = [];
this.currentEvents = [];
await this.startMicrophoneCapture();
await this.captureInitialScreenshot();
this.setupEventListeners();
}
catch (error) {
console.error('RecorderService.startRecording() error:', error);
@ -100,171 +116,118 @@ class RecorderService {
throw error;
}
}
async setupAudioRecording() {
console.log('RecorderService.setupAudioRecording()');
async startMicrophoneCapture() {
console.log('RecorderService.startMicrophoneCapture()');
try {
this.isListeningToMicrophone = true;
electron_1.ipcRenderer.on('audio-level', this.handleAudioLevel);
electron_1.ipcRenderer.on('audio-chunk', this.handleAudioChunk);
await electron_1.ipcRenderer.invoke('start-microphone-capture');
}
catch (error) {
console.error('RecorderService.setupAudioRecording() error:', error);
throw new Error(`Failed to setup audio recording: ${error.message}`);
console.error('Failed to start microphone capture:', error);
throw new Error(`Microphone initialization failed: ${error.message}`);
}
}
async processSilence() {
console.log('RecorderService.processSilence()');
if (this.isProcessingAudio)
async processCapturedAudio() {
if (this.isProcessingAudio || this.audioBuffer.length === 0)
return;
this.isProcessingAudio = true;
const combinedBuffer = Buffer.concat(this.audioBuffer);
this.audioBuffer = []; // Clear the buffer
try {
const audioFilePath = await electron_1.ipcRenderer.invoke('save-audio-chunk');
console.log('RecorderService.processSilence() - Audio saved to:', audioFilePath);
if (audioFilePath) {
this.currentAudioFile = audioFilePath;
await this.processAudioFile(audioFilePath);
await this.requestScreenshot();
const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`);
fs.writeFileSync(audioFilePath, combinedBuffer);
const transcription = await this.openAIService.transcribeAudio(new Blob([combinedBuffer], { type: 'audio/wav' }));
if (transcription.text.trim()) {
await this.processNarrationWithEvents(transcription.text);
}
fs.unlinkSync(audioFilePath);
}
catch (error) {
console.error('RecorderService.processSilence() error:', error);
console.error('Audio processing error:', error);
}
finally {
this.isProcessingAudio = false;
}
}
async processAudioFile(audioFilePath) {
console.log('RecorderService.processAudioFile()', { audioFilePath });
try {
const audioBuffer = fs.readFileSync(audioFilePath);
const transcription = await this.openAIService.transcribeAudio(new Blob([audioBuffer], { type: 'audio/wav' }));
console.log('RecorderService.processAudioFile() - Transcription:', transcription);
if (transcription.text.trim()) {
await this.processTranscription(transcription);
}
fs.unlinkSync(audioFilePath);
}
catch (error) {
console.error('RecorderService.processAudioFile() error:', error);
}
}
async processTranscription(transcription) {
console.log('RecorderService.processTranscription()', { transcription });
this.lastTranscription = transcription.text;
const cursorPosition = await electron_1.ipcRenderer.invoke('get-cursor-position');
console.log('RecorderService.processTranscription() - Cursor position:', cursorPosition);
const analysis = await this.openAIService.analyzeScreenWithContext({
async processNarrationWithEvents(narration) {
if (this.currentEvents.length === 0)
return;
const eventGroup = {
narration,
events: [...this.currentEvents],
screenshot: this.currentScreenshot,
transcription: this.lastTranscription,
cursorPosition
timestamp: Date.now()
};
this.eventGroups.push(eventGroup);
this.currentEvents = []; // Clear current events for next group
await this.captureInitialScreenshot(); // Get fresh screenshot for next group
}
setupEventListeners() {
electron_1.ipcRenderer.on('keyboard-event', this.handleKeyboardEvent);
electron_1.ipcRenderer.on('mouse-event', this.handleMouseEvent);
}
async captureInitialScreenshot() {
const sources = await electron_1.ipcRenderer.invoke('get-screenshot');
this.currentScreenshot = sources[0].thumbnail;
}
findElementAtPosition(analysis, x, y) {
return analysis.elements.find(element => {
const bounds = element.bounds;
return x >= bounds.x &&
x <= bounds.x + bounds.width &&
y >= bounds.y &&
y <= bounds.y + bounds.height;
});
console.log('RecorderService.processTranscription() - Screen analysis:', analysis);
if (analysis) {
this.events.push({
type: analysis.type,
identifier: analysis.identifier,
value: analysis.value,
timestamp: Date.now(),
narration: this.lastTranscription
});
}
}
async stopRecording() {
console.log('RecorderService.stopRecording()');
// Process any remaining audio
if (this.audioBuffer.length > 0) {
await this.processCapturedAudio();
}
this.cleanup();
return this.generateBasicCode();
}
cleanup() {
this.recording = false;
this.isListeningToMicrophone = false;
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
electron_1.ipcRenderer.removeListener('audio-level', this.handleAudioLevel);
electron_1.ipcRenderer.removeListener('audio-chunk', this.handleAudioChunk);
electron_1.ipcRenderer.removeListener('keyboard-event', this.keyboardHandleEvent);
if (this.currentAudioFile && fs.existsSync(this.currentAudioFile)) {
fs.unlinkSync(this.currentAudioFile);
}
const code = this.generateBasicCode();
console.log('RecorderService.stopRecording() - Generated code:', code);
return code;
}
async requestScreenshot() {
console.log('RecorderService.requestScreenshot()');
try {
const sources = await electron_1.ipcRenderer.invoke('get-screenshot');
console.log('RecorderService.requestScreenshot() - Sources:', sources);
const screenSource = sources[0];
await this.screenshotHandleEvent(null, screenSource.thumbnail);
}
catch (error) {
console.error('RecorderService.requestScreenshot() error:', error);
}
}
async screenshotHandleEvent(_, screenshot) {
console.log('RecorderService.screenshotHandleEvent()', { screenshot });
this.currentScreenshot = screenshot;
}
async keyboardHandleEvent(_, event) {
console.log('RecorderService.keyboardHandleEvent()', { key: event.key });
if (!this.recording)
return;
this.events.push({
type: 'type',
identifier: event.key,
timestamp: Date.now(),
narration: this.lastTranscription
});
}
async mouseHandleEvent(_, event) {
console.log('RecorderService.mouseHandleEvent()', { x: event.x, y: event.y });
if (!this.recording)
return;
const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
console.log('RecorderService.mouseHandleEvent() - Screen analysis:', analysis);
const element = this.findElementAtPosition(analysis, event.x, event.y);
console.log('RecorderService.mouseHandleEvent() - Found element:', element);
if (element) {
this.events.push({
type: 'click',
identifier: element.identifier,
timestamp: Date.now(),
narration: this.lastTranscription
});
}
}
findElementAtPosition(analysis, x, y) {
console.log('RecorderService.findElementAtPosition()', { x, y, analysisElementsCount: analysis.elements.length });
return analysis.elements.find((element) => {
const bounds = element.bounds;
const found = x >= bounds.x &&
x <= bounds.x + bounds.width &&
y >= bounds.y &&
y <= bounds.y + bounds.height;
if (found) {
console.log('RecorderService.findElementAtPosition() - Found matching element:', element);
}
return found;
electron_1.ipcRenderer.removeListener('keyboard-event', this.handleKeyboardEvent);
electron_1.ipcRenderer.removeListener('mouse-event', this.handleMouseEvent);
// Cleanup temp directory
fs.readdirSync(this.tempDir).forEach(file => {
fs.unlinkSync(path.join(this.tempDir, file));
});
}
generateBasicCode() {
console.log('RecorderService.generateBasicCode()', { eventsCount: this.events.length });
let basicCode = '10 REM BotDesktop Automation Script\n';
let lineNumber = 20;
for (const event of this.events) {
basicCode += `${lineNumber} REM ${event.narration}\n`;
this.eventGroups.forEach(group => {
basicCode += `${lineNumber} REM ${group.narration}\n`;
lineNumber += 10;
switch (event.type) {
case 'click':
basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
break;
case 'type':
basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
break;
case 'move':
basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
break;
}
lineNumber += 10;
}
group.events.forEach(event => {
switch (event.type) {
case 'click':
basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
break;
case 'type':
basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
break;
case 'move':
basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
break;
}
lineNumber += 10;
});
});
basicCode += `${lineNumber} END\n`;
console.log('RecorderService.generateBasicCode() - Generated code:', basicCode);
return basicCode;
}
}

View file

@ -36,10 +36,10 @@ const App: React.FC = () => {
};
return (
<div className="p-4">
<h1 className="text-2xl font-bold mb-4">BotDesktop Automation</h1>
<div className="p-4 h-auto">
<h1 className="text-2xl font-bold mb-4">General Bots Desktop</h1>
<div className="space-x-4 mb-4">
<div className="space-x-4 mb-4 h-auto">
<button
className={`px-4 py-2 rounded ${recording ? 'bg-red-500' : 'bg-blue-500'} text-white`}
onClick={recording ? handleStopRecording : handleStartRecording}
@ -56,11 +56,19 @@ const App: React.FC = () => {
</button>
</div>
<div className="mt-4">
<div className="mt-4 h-20">
<h2 className="text-xl font-bold mb-2">Generated BASIC Code:</h2>
<pre className="bg-gray-100 p-2 rounded border">{basicCode}</pre>
<pre className="h-20 min-h-100 bg-gray-100 p-2 rounded border">{basicCode}</pre>
</div>
<div className="mb-4">
<a href="https://github.com/General Bots">General Bots</a>
</div>
</div>
);
};

View file

@ -1,35 +1,139 @@
require('dotenv').config();
require('electron-require');
import { app, BrowserWindow, desktopCapturer, ipcMain } from 'electron';
import { app } from 'electron';
import { BrowserWindow, desktopCapturer, ipcMain } from 'electron';
import * as path from 'path';
import { systemPreferences } from 'electron';
import { RecorderService } from '../services/recorder.service';
import { PlayerService } from '../services/player.service';
interface AudioCapture {
mediaRecorder: MediaRecorder | null;
audioStream: MediaStream | null;
analyserNode: AnalyserNode | null;
audioData: Uint8Array | null;
isCapturing: boolean;
}
const audioCapture: AudioCapture = {
mediaRecorder: null,
audioStream: null,
analyserNode: null,
audioData: null,
isCapturing: false
};
const recorder = new RecorderService();
const player = new PlayerService();
function createWindow() {
const mainWindow = new BrowserWindow({
width: 1200,
height: 800,
width: 700,
height: 500,
backgroundColor: "grey",
center: true,
maximizable: false,
thickFrame: true,
autoHideMenuBar:true,
webPreferences: {
experimentalFeatures: true,
nodeIntegrationInWorker: true,
nodeIntegration: true,
nodeIntegrationInSubFrames: true,
contextIsolation: false,
preload: path.join(__dirname, '../preload/preload.js')
}
});
mainWindow.setAutoHideMenuBar(true);
mainWindow. setMaximizable(false);
if (process.env.NODE_ENV === 'development') {
mainWindow.loadURL('http://localhost:8080');
mainWindow.webContents.openDevTools();
} else {
mainWindow.loadFile(path.join(__dirname, '../../src/renderer/index.html'));
}
} ipcMain.handle('mouse-event', recorder.handleMouseEvent.bind(recorder));
ipcMain.handle('keyboard-event', recorder.handleKeyboardEvent.bind(recorder));
// Handler to capture the entire screen
ipcMain.handle('get-screenshot', async () => {
console.log('get-screenshot called');
const sources = await desktopCapturer.getSources({ types: ['screen'] });
const screenSource = sources[0]; // Get the first screen source
const { thumbnail } = screenSource; // Thumbnail is a native image
return thumbnail.toPNG(); // Return the screenshot as PNG buffer
});
ipcMain.handle('start-recording', async () => {
console.log('start-recording called');
await recorder.startRecording();
});
ipcMain.handle('stop-recording', async () => {
console.log('stop-recording called');
return await recorder.stopRecording();
});
ipcMain.handle('execute-basic-code', async (_, code: string) => {
console.log('execute-basic-code called with:', code);
await player.executeBasicCode(code);
});
ipcMain.handle('check-microphone-permission', async () => {
console.log('check-microphone-permission called');
if (process.platform === 'darwin') {
const status = await systemPreferences.getMediaAccessStatus('microphone');
if (status !== 'granted') {
const success = await systemPreferences.askForMediaAccess('microphone');
return success;
}
return true;
}
return true; // On Windows/Linux, permissions are handled by the OS
});
ipcMain.handle('start-microphone-capture', async (event) => {
debugger;
const window = BrowserWindow.fromWebContents(event.sender);
if (!window) {
throw new Error('No window found for this request');
}
return startMicrophoneCapture(window);
});
ipcMain.handle('stop-microphone-capture', async (event) => {
const window = BrowserWindow.fromWebContents(event.sender);
if (!window) {
throw new Error('No window found for this request');
}
return stopMicrophoneCapture(window);
});
ipcMain.handle('start-microphone-capture', async (event, ...args) => {
// Perform asynchronous microphone capture logic here
try {
const result = await startMicrophoneCapture(args[0]); // Assuming this function is async
return result;
} catch (error) {
console.error("Error during microphone capture:", error);
throw error; // Send the error back to the renderer
}
});
ipcMain.handle('stop-microphone-capture', async (event, ...args) => {
try {
const result = await stopMicrophoneCapture(args[0]);
return result;
} catch (error) {
console.error("Error stopping microphone capture:", error);
throw error; // Send the error back to the renderer
}
});
}
app.whenReady().then(createWindow);
@ -46,47 +150,153 @@ app.on('activate', () => {
}
});
ipcMain.handle('mouse-event', recorder.mouseHandleEvent.bind(recorder));
ipcMain.handle('keyboard-event', recorder.keyboardHandleEvent.bind(recorder));
ipcMain.handle('screenshot-captured', recorder.screenshotHandleEvent.bind(recorder));
// Handler to capture the entire screen
ipcMain.handle('get-screenshot', async () => {
console.log('get-screenshot called');
const sources = await desktopCapturer.getSources({ types: ['screen'] });
const screenSource = sources[0]; // Get the first screen source
const { thumbnail } = screenSource; // Thumbnail is a native image
return thumbnail.toPNG(); // Return the screenshot as PNG buffer
});
ipcMain.handle('start-recording', async () => {
console.log('start-recording called');
await recorder.startRecording();
});
ipcMain.handle('stop-recording', async () => {
console.log('stop-recording called');
return await recorder.stopRecording();
});
ipcMain.handle('execute-basic-code', async (_, code: string) => {
console.log('execute-basic-code called with:', code);
await player.executeBasicCode(code);
});
ipcMain.handle('check-microphone-permission', async () => {
console.log('check-microphone-permission called');
if (process.platform === 'darwin') {
const status = await systemPreferences.getMediaAccessStatus('microphone');
if (status !== 'granted') {
const success = await systemPreferences.askForMediaAccess('microphone');
return success;
}
return true;
}
return true; // On Windows/Linux, permissions are handled by the OS
});
// Enable required permissions
app.commandLine.appendSwitch('enable-speech-dispatcher');
// Register cleanup on app quit
app.on('will-quit', cleanupAudioCapture);
// Function to get the focused window or first available window
function getFocusedWindow(): BrowserWindow | null {
const focusedWindow = BrowserWindow.getFocusedWindow();
if (focusedWindow) return focusedWindow;
const windows = BrowserWindow.getAllWindows();
return windows.length > 0 ? windows[0] : null;
}
// Function to safely send to window
function sendToWindow(channel: string, ...args: any[]) {
const window = getFocusedWindow();
if (window && !window.isDestroyed()) {
window.webContents.send(channel, ...args);
}
}
async function startMicrophoneCapture(window: BrowserWindow): Promise<void> {
console.log('Starting microphone capture...');
try {
// Request microphone access
//@ts-ignore
const stream = await window.myApi.startMicrophone()
audioCapture.audioStream = stream;
// Set up audio analysis
const audioContext = new ((window as any).AudioContext || (window as any).webkitAudioContext)();
const sourceNode = audioContext.createMediaStreamSource(stream);
audioCapture.analyserNode = audioContext.createAnalyser();
audioCapture.analyserNode.fftSize = 2048;
sourceNode.connect(audioCapture.analyserNode);
audioCapture.audioData = new Uint8Array(audioCapture.analyserNode.frequencyBinCount);
// Set up MediaRecorder
audioCapture.mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm;codecs=opus'
});
// Handle audio data
audioCapture.mediaRecorder.ondataavailable = (event: BlobEvent) => {
if (event.data.size > 0 && !window.isDestroyed()) {
// Convert blob to buffer and send to renderer
const reader = new FileReader();
reader.onloadend = () => {
const buffer = Buffer.from(reader.result as ArrayBuffer);
window.webContents.send('audio-chunk', buffer);
};
reader.readAsArrayBuffer(event.data);
}
};
// Start recording
audioCapture.mediaRecorder.start(1000); // Capture in 1-second chunks
audioCapture.isCapturing = true;
// Start audio level monitoring
monitorAudioLevels(window);
console.log('Microphone capture started successfully');
} catch (error) {
console.error('Failed to start microphone capture:', error);
throw error;
}
}
function monitorAudioLevels(window: BrowserWindow) {
if (!audioCapture.isCapturing || !audioCapture.analyserNode || !audioCapture.audioData || window.isDestroyed()) {
return;
}
// Get audio level data
audioCapture.analyserNode.getByteFrequencyData(audioCapture.audioData);
// Calculate average volume level (0-1)
const average = audioCapture.audioData.reduce((acc, value) => acc + value, 0) /
audioCapture.audioData.length /
255;
// Send level to renderer
if (!window.isDestroyed()) {
window.webContents.send('audio-level', average);
}
// Continue monitoring
requestAnimationFrame(() => monitorAudioLevels(window));
}
function stopMicrophoneCapture(window: BrowserWindow) {
console.log('Stopping microphone capture...');
try {
if (audioCapture.mediaRecorder && audioCapture.mediaRecorder.state !== 'inactive') {
audioCapture.mediaRecorder.stop();
}
if (audioCapture.audioStream) {
audioCapture.audioStream.getTracks().forEach(track => track.stop());
}
if (audioCapture.analyserNode) {
audioCapture.analyserNode.disconnect();
}
audioCapture.isCapturing = false;
audioCapture.mediaRecorder = null;
audioCapture.audioStream = null;
audioCapture.analyserNode = null;
audioCapture.audioData = null;
if (!window.isDestroyed()) {
window.webContents.send('microphone-stopped');
}
console.log('Microphone capture stopped successfully');
} catch (error) {
console.error('Failed to stop microphone capture:', error);
throw error;
}
}
// Error handler for audio processing
function handleAudioError(error: Error, window: BrowserWindow): void {
console.error('Audio processing error:', error);
stopMicrophoneCapture(window);
// Notify renderer of error if window still exists
if (!window.isDestroyed()) {
window.webContents.send('audio-error', error.message);
}
}
// Clean up resources when app is closing
export function cleanupAudioCapture(): void {
const window = getFocusedWindow();
if (window) {
stopMicrophoneCapture(window);
}
}

View file

@ -1,7 +1,20 @@
const { ipcRenderer } = require('electron');
const { contextBridge } = require('electron');
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
//@ts-nocheck
(window as any).myApi = {
startMicrophone: ()=>{
alert(1);
if (navigator.mediaDevices) {
return navigator.mediaDevices.getUserMedia({ audio: true });
} else {
console.error("MediaDevices API not supported");
}
},
sendMessage: (message: any) => {
console.log('[preload] sendMessage called with:', message);
return ipcRenderer.send('message-from-renderer', message);
@ -11,3 +24,4 @@ const { ipcRenderer } = require('electron');
return ipcRenderer.on('message-from-main', (event, arg) => callback(arg));
},
};

View file

@ -2,7 +2,7 @@
<html>
<head>
<meta charset="UTF-8">
<title>BotDesktop</title>
<title>General Bots Desktop</title>
<script>var global = global || window;</script>
<script src="https://cdn.tailwindcss.com"></script>
</head>

View file

@ -56,7 +56,7 @@ export class OpenAIService {
role: 'system',
content: `You are an AI that analyzes screenshots and voice commands to determine user intentions for automation.
You should identify UI elements and return specific actions in JSON format.
Focus on the area near the cursor position when relevant.`
Focus on the area near the field ${context.identifier}.`
},
{
role: 'user',

View file

@ -1,9 +1,20 @@
import { ipcMain } from 'electron';
import { AutomationEvent, ScreenAnalysis } from './types';
import { OpenAIService } from './openai.service';
import { ipcRenderer, ipcMain } from 'electron';
import { AutomationEvent, ScreenAnalysis, WhisperResponse } from '../services/types';
import { OpenAIService } from '../services/openai.service';
import * as path from 'path';
import * as fs from 'fs';
interface EventGroup {
narration: string;
events: AutomationEvent[];
screenshot: string;
timestamp: number;
}
export class PlayerService {
private openAIService: OpenAIService;
private currentScreenshot: string = '';
private isPlaying: boolean = false;
constructor() {
console.log('[PlayerService] Initializing');
@ -12,31 +23,60 @@ export class PlayerService {
async executeBasicCode(code: string) {
console.log('[PlayerService] executeBasicCode called with:', code);
this.isPlaying = true;
const lines = code.split('\n');
for (const line of lines) {
if (line.trim().startsWith('REM') || line.trim() === '') continue;
const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/);
if (!match) continue;
try {
for (const line of lines) {
if (!this.isPlaying) break;
if (line.trim().startsWith('REM') || line.trim() === '') continue;
const match = line.match(/^\d+\s+(\w+)\s+"([^"]+)"(?:\s+"([^"]+)")?/);
if (!match) continue;
const [_, command, identifier, value] = match;
console.log('[PlayerService] Executing command:', { command, identifier, value });
await this.executeCommand(command, identifier, value);
await new Promise(resolve => setTimeout(resolve, 500));
const [_, command, identifier, value] = match;
console.log('[PlayerService] Executing command:', { command, identifier, value });
await this.captureAndAnalyzeScreen();
await this.executeCommand(command, identifier, value);
await new Promise(resolve => setTimeout(resolve, 500));
}
} catch (error) {
console.error('[PlayerService] Execution error:', error);
this.isPlaying = false;
throw error;
}
}
private async captureAndAnalyzeScreen() {
console.log('[PlayerService] captureAndAnalyzeScreen called');
const sources = await ipcRenderer.invoke('get-screenshot');
this.currentScreenshot = sources[0].thumbnail;
}
private async executeCommand(command: string, identifier: string, value?: string) {
console.log('[PlayerService] executeCommand called with:', { command, identifier, value });
const screenshotPath = await this.captureScreen();
console.log('[PlayerService] Screen captured at:', screenshotPath);
const element = await this.openAIService.analyzeScreenWithContext({
screenshot: this.currentScreenshot,
transcription: '',
identifier,cursorPosition: null
});
//@ts-nocheck
const analysis = await this.openAIService.analyzeScreen(screenshotPath);
const element = analysis.elements.find(e => e.identifier === identifier);
if (!element) throw new Error(`Element not found: ${identifier}`);
if (!element) {
console.warn(`[PlayerService] Element not found: ${identifier}, retrying with fresh analysis`);
await this.captureAndAnalyzeScreen();
const newElement = await this.openAIService.analyzeScreenWithContext({
screenshot: this.currentScreenshot,
transcription: '',
cursorPosition: await ipcRenderer.invoke('get-cursor-position'),
identifier
});
if (!newElement) throw new Error(`Element not found after retry: ${identifier}`);
}
const centerX = element.bounds.x + element.bounds.width/2;
const centerY = element.bounds.y + element.bounds.height/2;
@ -44,49 +84,22 @@ export class PlayerService {
switch (command) {
case 'CLICK':
console.log('[PlayerService] Simulating click at:', { centerX, centerY });
await this.simulateClick(centerX, centerY);
await ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY });
break;
case 'TYPE':
console.log('[PlayerService] Simulating type:', { centerX, centerY, value });
await this.simulateClick(centerX, centerY);
await this.simulateTyping(value || '');
await ipcRenderer.invoke('simulate-click', { x: centerX, y: centerY });
await ipcRenderer.invoke('simulate-type', { text: value || '' });
break;
case 'MOVE':
console.log('[PlayerService] Simulating move:', { centerX, centerY });
await ipcRenderer.invoke('simulate-move', { x: centerX, y: centerY });
break;
}
}
private async captureScreen(): Promise<string> {
console.log('[PlayerService] captureScreen called');
return new Promise((resolve, reject) => {
ipcMain.once('screen-captured', (_, screenshotPath) => {
console.log('[PlayerService] Screen captured event received:', screenshotPath);
resolve(screenshotPath);
});
ipcMain.emit('capture-screen');
});
public stop() {
console.log('[PlayerService] Stopping playback');
this.isPlaying = false;
}
private async simulateClick(x: number, y: number): Promise<void> {
console.log('[PlayerService] simulateClick called with:', { x, y });
return new Promise((resolve) => {
ipcMain.once('click-completed', () => {
console.log('[PlayerService] Click completed');
resolve();
});
ipcMain.emit('simulate-click', { x, y });
});
}
private async simulateTyping(text: string): Promise<void> {
console.log('[PlayerService] simulateTyping called with:', text);
return new Promise((resolve) => {
ipcMain.once('typing-completed', () => {
console.log('[PlayerService] Typing completed');
resolve();
});
ipcMain.emit('simulate-typing', { text });
});
}
}
}

View file

@ -1,24 +1,32 @@
import { ipcRenderer } from 'electron';
import { AutomationEvent, ScreenAnalysis, WhisperResponse } from '../services/types';
import { AutomationEvent, EventGroup, ScreenAnalysis, WhisperResponse } from '../services/types';
import { OpenAIService } from '../services/openai.service';
import * as path from 'path';
import * as fs from 'fs';
export class RecorderService {
private events: AutomationEvent[] = [];
private eventGroups: EventGroup[] = [];
private currentEvents: AutomationEvent[] = [];
private recording: boolean = false;
private openAIService: OpenAIService;
private currentScreenshot: string = '';
private lastTranscription: string = '';
private currentAudioFile: string = '';
private audioBuffer: Buffer[] = [];
private isListeningToMicrophone: boolean = false;
private silenceTimer: NodeJS.Timeout | null = null;
private isProcessingAudio: boolean = false;
private tempDir: string;
private SILENCE_THRESHOLD = 0.01;
private SILENCE_DURATION = 1500; // 1.5 seconds of silence to trigger processing
private MIN_AUDIO_DURATION = 500; // Minimum audio duration to process
constructor() {
console.log('RecorderService.constructor()');
this.openAIService = new OpenAIService();
this.tempDir = path.join(process.cwd(), 'temp_recordings');
this.ensureTempDirectory();
}
private ensureTempDirectory() {
if (!fs.existsSync(this.tempDir)) {
fs.mkdirSync(this.tempDir, { recursive: true });
}
@ -28,10 +36,11 @@ export class RecorderService {
console.log('RecorderService.startRecording()');
try {
this.recording = true;
this.events = [];
await this.setupAudioRecording();
await this.requestScreenshot();
ipcRenderer.on('keyboard-event', this.keyboardHandleEvent);
this.eventGroups = [];
this.currentEvents = [];
await this.startMicrophoneCapture();
await this.captureInitialScreenshot();
this.setupEventListeners();
} catch (error) {
console.error('RecorderService.startRecording() error:', error);
this.recording = false;
@ -39,127 +48,148 @@ export class RecorderService {
}
}
private async setupAudioRecording() {
console.log('RecorderService.setupAudioRecording()');
private async startMicrophoneCapture() {
console.log('RecorderService.startMicrophoneCapture()');
try {
this.isListeningToMicrophone = true;
ipcRenderer.on('audio-level', this.handleAudioLevel);
ipcRenderer.on('audio-chunk', this.handleAudioChunk);
await ipcRenderer.invoke('start-microphone-capture');
} catch (error) {
console.error('RecorderService.setupAudioRecording() error:', error);
throw new Error(`Failed to setup audio recording: ${error.message}`);
console.error('Failed to start microphone capture:', error);
throw new Error(`Microphone initialization failed: ${error.message}`);
}
}
private handleAudioLevel = async (_: any, level: number) => {
console.log('RecorderService.handleAudioLevel()', { level });
if (!this.recording) return;
public handleAudioLevel = (_: any, level: number) => {
if (!this.recording || !this.isListeningToMicrophone) return;
const SILENCE_THRESHOLD = 0.01;
const SILENCE_DURATION = 1000;
if (level < SILENCE_THRESHOLD) {
if (!this.silenceTimer && !this.isProcessingAudio) {
console.log('RecorderService.handleAudioLevel() - Setting silence timer');
if (level < this.SILENCE_THRESHOLD) {
if (!this.silenceTimer && !this.isProcessingAudio && this.audioBuffer.length > 0) {
this.silenceTimer = setTimeout(async () => {
if (this.recording) {
await this.processSilence();
await this.processCapturedAudio();
}
}, SILENCE_DURATION);
}, this.SILENCE_DURATION);
}
} else {
if (this.silenceTimer) {
console.log('RecorderService.handleAudioLevel() - Clearing silence timer');
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
}
}
}
private handleAudioChunk = async (_: any, chunk: Buffer) => {
console.log('RecorderService.handleAudioChunk()', { chunkSize: chunk.length });
if (!this.recording) return;
public handleAudioChunk = (_: any, chunk: Buffer) => {
if (!this.recording || !this.isListeningToMicrophone) return;
this.audioBuffer.push(chunk);
}
private async processCapturedAudio() {
if (this.isProcessingAudio || this.audioBuffer.length === 0) return;
this.isProcessingAudio = true;
const combinedBuffer = Buffer.concat(this.audioBuffer);
this.audioBuffer = []; // Clear the buffer
try {
const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`);
fs.writeFileSync(audioFilePath, chunk);
fs.writeFileSync(audioFilePath, combinedBuffer);
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
this.silenceTimer = null;
await this.processAudioFile(audioFilePath);
}
} catch (error) {
console.error('RecorderService.handleAudioChunk() error:', error);
}
};
const transcription = await this.openAIService.transcribeAudio(
new Blob([combinedBuffer], { type: 'audio/wav' })
);
private async processSilence() {
console.log('RecorderService.processSilence()');
if (this.isProcessingAudio) return;
this.isProcessingAudio = true;
try {
const audioFilePath = await ipcRenderer.invoke('save-audio-chunk');
console.log('RecorderService.processSilence() - Audio saved to:', audioFilePath);
if (audioFilePath) {
this.currentAudioFile = audioFilePath;
await this.processAudioFile(audioFilePath);
await this.requestScreenshot();
if (transcription.text.trim()) {
await this.processNarrationWithEvents(transcription.text);
}
fs.unlinkSync(audioFilePath);
} catch (error) {
console.error('RecorderService.processSilence() error:', error);
console.error('Audio processing error:', error);
} finally {
this.isProcessingAudio = false;
}
}
private async processAudioFile(audioFilePath: string) {
console.log('RecorderService.processAudioFile()', { audioFilePath });
try {
const audioBuffer = fs.readFileSync(audioFilePath);
const transcription = await this.openAIService.transcribeAudio(
new Blob([audioBuffer], { type: 'audio/wav' })
);
console.log('RecorderService.processAudioFile() - Transcription:', transcription);
private async processNarrationWithEvents(narration: string) {
if (this.currentEvents.length === 0) return;
if (transcription.text.trim()) {
await this.processTranscription(transcription);
}
const eventGroup: EventGroup = {
narration,
events: [...this.currentEvents],
screenshot: this.currentScreenshot,
timestamp: Date.now()
};
fs.unlinkSync(audioFilePath);
} catch (error) {
console.error('RecorderService.processAudioFile() error:', error);
}
this.eventGroups.push(eventGroup);
this.currentEvents = []; // Clear current events for next group
await this.captureInitialScreenshot(); // Get fresh screenshot for next group
}
private async processTranscription(transcription: WhisperResponse) {
console.log('RecorderService.processTranscription()', { transcription });
this.lastTranscription = transcription.text;
const cursorPosition = await ipcRenderer.invoke('get-cursor-position');
console.log('RecorderService.processTranscription() - Cursor position:', cursorPosition);
const analysis = await this.openAIService.analyzeScreenWithContext({
screenshot: this.currentScreenshot,
transcription: this.lastTranscription,
cursorPosition
});
console.log('RecorderService.processTranscription() - Screen analysis:', analysis);
private setupEventListeners() {
ipcRenderer.on('keyboard-event', this.handleKeyboardEvent);
ipcRenderer.on('mouse-event', this.handleMouseEvent);
}
if (analysis) {
this.events.push({
type: analysis.type,
identifier: analysis.identifier,
value: analysis.value,
private async captureInitialScreenshot() {
const sources = await ipcRenderer.invoke('get-screenshot');
this.currentScreenshot = sources[0].thumbnail;
}
public handleKeyboardEvent = async (_: any, event: KeyboardEvent) => {
if (!this.recording) return;
this.currentEvents.push({
type: 'type',
identifier: event.key,
value: event.key,
timestamp: Date.now(),
narration: ''
});
}
public handleMouseEvent = async (_: any, event: MouseEvent) => {
if (!this.recording) return;
const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
const element = this.findElementAtPosition(analysis, event.clientX, event.clientY);
if (element) {
this.currentEvents.push({
type: 'click',
identifier: element.identifier,
timestamp: Date.now(),
narration: this.lastTranscription
narration: ''
});
}
}
private findElementAtPosition(analysis: ScreenAnalysis, x: number, y: number) {
return analysis.elements.find(element => {
const bounds = element.bounds;
return x >= bounds.x &&
x <= bounds.x + bounds.width &&
y >= bounds.y &&
y <= bounds.y + bounds.height;
});
}
public async stopRecording(): Promise<string> {
console.log('RecorderService.stopRecording()');
// Process any remaining audio
if (this.audioBuffer.length > 0) {
await this.processCapturedAudio();
}
this.cleanup();
return this.generateBasicCode();
}
private cleanup() {
this.recording = false;
this.isListeningToMicrophone = false;
if (this.silenceTimer) {
clearTimeout(this.silenceTimer);
@ -168,106 +198,40 @@ export class RecorderService {
ipcRenderer.removeListener('audio-level', this.handleAudioLevel);
ipcRenderer.removeListener('audio-chunk', this.handleAudioChunk);
ipcRenderer.removeListener('keyboard-event', this.keyboardHandleEvent);
ipcRenderer.removeListener('keyboard-event', this.handleKeyboardEvent);
ipcRenderer.removeListener('mouse-event', this.handleMouseEvent);
if (this.currentAudioFile && fs.existsSync(this.currentAudioFile)) {
fs.unlinkSync(this.currentAudioFile);
}
const code = this.generateBasicCode();
console.log('RecorderService.stopRecording() - Generated code:', code);
return code;
}
private async requestScreenshot() {
console.log('RecorderService.requestScreenshot()');
try {
const sources = await ipcRenderer.invoke('get-screenshot');
console.log('RecorderService.requestScreenshot() - Sources:', sources);
const screenSource = sources[0];
await this.screenshotHandleEvent(null, screenSource.thumbnail);
} catch (error) {
console.error('RecorderService.requestScreenshot() error:', error);
}
}
public async screenshotHandleEvent(_: any, screenshot: string) {
console.log('RecorderService.screenshotHandleEvent()', { screenshot });
this.currentScreenshot = screenshot;
}
public async keyboardHandleEvent(_: any, event: KeyboardEvent) {
console.log('RecorderService.keyboardHandleEvent()', { key: event.key });
if (!this.recording) return;
this.events.push({
type: 'type',
identifier: event.key,
timestamp: Date.now(),
narration: this.lastTranscription
});
}
public async mouseHandleEvent(_: any, event: any) {
console.log('RecorderService.mouseHandleEvent()', { x: event.x, y: event.y });
if (!this.recording) return;
const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
console.log('RecorderService.mouseHandleEvent() - Screen analysis:', analysis);
const element = this.findElementAtPosition(analysis, event.x, event.y);
console.log('RecorderService.mouseHandleEvent() - Found element:', element);
if (element) {
this.events.push({
type: 'click',
identifier: element.identifier,
timestamp: Date.now(),
narration: this.lastTranscription
});
}
}
private findElementAtPosition(analysis: ScreenAnalysis, x: number, y: number) {
console.log('RecorderService.findElementAtPosition()', { x, y, analysisElementsCount: analysis.elements.length });
return analysis.elements.find((element) => {
const bounds = element.bounds;
const found = x >= bounds.x &&
x <= bounds.x + bounds.width &&
y >= bounds.y &&
y <= bounds.y + bounds.height;
if (found) {
console.log('RecorderService.findElementAtPosition() - Found matching element:', element);
}
return found;
// Cleanup temp directory
fs.readdirSync(this.tempDir).forEach(file => {
fs.unlinkSync(path.join(this.tempDir, file));
});
}
private generateBasicCode(): string {
console.log('RecorderService.generateBasicCode()', { eventsCount: this.events.length });
let basicCode = '10 REM BotDesktop Automation Script\n';
let lineNumber = 20;
for (const event of this.events) {
basicCode += `${lineNumber} REM ${event.narration}\n`;
this.eventGroups.forEach(group => {
basicCode += `${lineNumber} REM ${group.narration}\n`;
lineNumber += 10;
switch (event.type) {
case 'click':
basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
break;
case 'type':
basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
break;
case 'move':
basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
break;
}
lineNumber += 10;
}
group.events.forEach(event => {
switch (event.type) {
case 'click':
basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
break;
case 'type':
basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
break;
case 'move':
basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
break;
}
lineNumber += 10;
});
});
basicCode += `${lineNumber} END\n`;
console.log('RecorderService.generateBasicCode() - Generated code:', basicCode);
return basicCode;
}
}

View file

@ -1,3 +1,22 @@
export interface PlaybackEvent {
command: string;
args: string[];
description?: string;
}
export interface ScreenElement {
identifier: string;
bounds: {
x: number;
y: number;
width: number;
height: number;
};
windowName: string;
value?: string;
}
export interface AutomationAction {
type: 'click' | 'type' | 'move';
identifier: string;
@ -12,6 +31,12 @@ export interface AutomationAction {
}
export interface EventGroup {
narration: string;
events: AutomationEvent[];
screenshot: string;
timestamp: number;
}
export interface AutomationEvent {
type: 'click' | 'type' | 'move';
@ -30,6 +55,7 @@ export interface ScreenContext {
screenshot: string;
transcription: string;
cursorPosition: { x: number, y: number };
identifier: string;
}
export interface ScreenAnalysis {