new(all): Initial import.
This commit is contained in:
parent
e6d2ffa35a
commit
5ebde5b646
15 changed files with 9645 additions and 151 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,2 +1,3 @@
|
||||||
node_modules
|
node_modules
|
||||||
.env
|
.env
|
||||||
|
output.txt
|
||||||
|
|
|
||||||
4
.vscode/launch.json
vendored
4
.vscode/launch.json
vendored
|
|
@ -5,12 +5,12 @@
|
||||||
"name": "Electron: Main",
|
"name": "Electron: Main",
|
||||||
"type": "node",
|
"type": "node",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${workspaceFolder}/node_modules/electron/dist/electron.js",
|
"sourceMaps": true,
|
||||||
"args": ["${workspaceFolder}/dist/main/main.js"],
|
"args": ["${workspaceFolder}/dist/main/main.js"],
|
||||||
"outFiles": ["${workspaceFolder}/dist/**/*.js"],
|
"outFiles": ["${workspaceFolder}/dist/**/*.js"],
|
||||||
"cwd": "${workspaceFolder}",
|
"cwd": "${workspaceFolder}",
|
||||||
"sourceMaps": true,
|
|
||||||
"protocol": "inspector",
|
"protocol": "inspector",
|
||||||
|
"console": "integratedTerminal",
|
||||||
"windows": {
|
"windows": {
|
||||||
"runtimeExecutable": "${workspaceFolder}/node_modules/.bin/electron.cmd"
|
"runtimeExecutable": "${workspaceFolder}/node_modules/.bin/electron.cmd"
|
||||||
},
|
},
|
||||||
|
|
|
||||||
18
dist/main/main.js
vendored
18
dist/main/main.js
vendored
|
|
@ -23,8 +23,11 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
require('dotenv').config();
|
||||||
const electron_1 = require("electron");
|
const electron_1 = require("electron");
|
||||||
const path = __importStar(require("path"));
|
const path = __importStar(require("path"));
|
||||||
|
// In main.ts
|
||||||
|
const electron_2 = require("electron");
|
||||||
const recorder_service_1 = require("../services/recorder.service");
|
const recorder_service_1 = require("../services/recorder.service");
|
||||||
const player_service_1 = require("../services/player.service");
|
const player_service_1 = require("../services/player.service");
|
||||||
const recorder = new recorder_service_1.RecorderService();
|
const recorder = new recorder_service_1.RecorderService();
|
||||||
|
|
@ -70,3 +73,18 @@ electron_1.ipcMain.handle('stop-recording', async () => {
|
||||||
electron_1.ipcMain.handle('execute-basic-code', async (_, code) => {
|
electron_1.ipcMain.handle('execute-basic-code', async (_, code) => {
|
||||||
await player.executeBasicCode(code);
|
await player.executeBasicCode(code);
|
||||||
});
|
});
|
||||||
|
// Add microphone permission check for macOS
|
||||||
|
electron_1.ipcMain.handle('check-microphone-permission', async () => {
|
||||||
|
if (process.platform === 'darwin') {
|
||||||
|
const status = await electron_2.systemPreferences.getMediaAccessStatus('microphone');
|
||||||
|
if (status !== 'granted') {
|
||||||
|
const success = await electron_2.systemPreferences.askForMediaAccess('microphone');
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// On Windows/Linux, permissions are handled by the OS
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
// Enable required permissions
|
||||||
|
electron_1.app.commandLine.appendSwitch('enable-speech-dispatcher');
|
||||||
|
|
|
||||||
1
dist/preload/preload.js
vendored
1
dist/preload/preload.js
vendored
|
|
@ -1 +0,0 @@
|
||||||
// Preload script goes here
|
|
||||||
164
dist/services/openai.service.js
vendored
164
dist/services/openai.service.js
vendored
|
|
@ -1,56 +1,150 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
||||||
if (k2 === undefined) k2 = k;
|
|
||||||
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
||||||
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
||||||
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
||||||
}
|
|
||||||
Object.defineProperty(o, k2, desc);
|
|
||||||
}) : (function(o, m, k, k2) {
|
|
||||||
if (k2 === undefined) k2 = k;
|
|
||||||
o[k2] = m[k];
|
|
||||||
}));
|
|
||||||
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
||||||
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
||||||
}) : function(o, v) {
|
|
||||||
o["default"] = v;
|
|
||||||
});
|
|
||||||
var __importStar = (this && this.__importStar) || function (mod) {
|
|
||||||
if (mod && mod.__esModule) return mod;
|
|
||||||
var result = {};
|
|
||||||
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
||||||
__setModuleDefault(result, mod);
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.OpenAIService = void 0;
|
exports.OpenAIService = void 0;
|
||||||
const openai_1 = require("openai");
|
const openai_1 = require("openai");
|
||||||
const fs = __importStar(require("fs"));
|
const { Readable } = require('stream');
|
||||||
class OpenAIService {
|
class OpenAIService {
|
||||||
constructor() {
|
constructor() {
|
||||||
this.client = new openai_1.AzureOpenAI({ dangerouslyAllowBrowser: true,
|
this.client = new openai_1.AzureOpenAI({
|
||||||
|
dangerouslyAllowBrowser: true,
|
||||||
endpoint: process.env.AZURE_OPEN_AI_ENDPOINT || '',
|
endpoint: process.env.AZURE_OPEN_AI_ENDPOINT || '',
|
||||||
deployment: process.env.AZURE_OPEN_AI_IMAGE_MODEL || '',
|
apiVersion: process.env.OPENAI_API_VERSION || '2024-02-15-preview',
|
||||||
apiVersion: process.env.OPENAI_API_VERSION || '',
|
|
||||||
apiKey: process.env.AZURE_OPEN_AI_KEY || ''
|
apiKey: process.env.AZURE_OPEN_AI_KEY || ''
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
async analyzeScreen(imagePath) {
|
async transcribeAudio(audioBlob) {
|
||||||
const imageBuffer = fs.readFileSync(imagePath);
|
try {
|
||||||
const base64Image = imageBuffer.toString('base64');
|
// Convert Blob to ArrayBuffer
|
||||||
|
const arrayBuffer = await audioBlob.arrayBuffer();
|
||||||
|
// Convert Buffer to a Readable stream
|
||||||
|
const buffer = Buffer.from(arrayBuffer);
|
||||||
|
const stream = new Readable();
|
||||||
|
stream.push(buffer);
|
||||||
|
stream.push(null); // Signal the end of the stream
|
||||||
|
const response = await this.client.audio.transcriptions.create({
|
||||||
|
file: stream,
|
||||||
|
model: process.env.AZURE_OPEN_AI_WHISPER_MODEL || 'whisper-1',
|
||||||
|
language: 'en',
|
||||||
|
response_format: 'verbose_json'
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
text: response.text,
|
||||||
|
//@ts-ignore
|
||||||
|
segments: response.segments?.map(seg => ({
|
||||||
|
text: seg.text,
|
||||||
|
start: seg.start,
|
||||||
|
end: seg.end
|
||||||
|
})) || []
|
||||||
|
};
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
console.error('Error in transcribeAudio:', error);
|
||||||
|
throw new Error('Failed to transcribe audio');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async analyzeScreenWithContext(context) {
|
||||||
|
try {
|
||||||
const response = await this.client.chat.completions.create({
|
const response = await this.client.chat.completions.create({
|
||||||
model: process.env.AZURE_OPEN_AI_LLM_MODEL || '',
|
model: process.env.AZURE_OPEN_AI_VISION_MODEL || '',
|
||||||
messages: [
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: `You are an AI that analyzes screenshots and voice commands to determine user intentions for automation.
|
||||||
|
You should identify UI elements and return specific actions in JSON format.
|
||||||
|
Focus on the area near the cursor position when relevant.`
|
||||||
|
},
|
||||||
{
|
{
|
||||||
role: 'user',
|
role: 'user',
|
||||||
content: [
|
content: [
|
||||||
{ type: 'text', text: 'Analyze this screenshot and identify all interactive elements (buttons, text fields, etc). Return their locations and identifiers.' },
|
{
|
||||||
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64Image}` } }
|
type: 'text',
|
||||||
],
|
text: `Analyze this screenshot with the following context:
|
||||||
|
Voice Command: "${context.transcription}"
|
||||||
|
Cursor Position: x=${context.cursorPosition.x}, y=${context.cursorPosition.y}
|
||||||
|
|
||||||
|
Identify the most likely action based on the voice command and cursor position.
|
||||||
|
Return in format: {
|
||||||
|
"type": "click|type|move",
|
||||||
|
"identifier": "element-id or descriptive name",
|
||||||
|
"value": "text to type (for type actions)",
|
||||||
|
"confidence": 0-1,
|
||||||
|
"bounds": {"x": number, "y": number, "width": number, "height": number}
|
||||||
|
}`
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: `data:image/png;base64,${context.screenshot}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
],
|
],
|
||||||
|
max_tokens: 500,
|
||||||
|
temperature: 0.3
|
||||||
});
|
});
|
||||||
return JSON.parse(response.choices[0].message.content || '{}');
|
const result = JSON.parse(response.choices[0].message.content || '{}');
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
console.error('Error in analyzeScreenWithContext:', error);
|
||||||
|
throw new Error('Failed to analyze screen context');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async analyzeScreen(screenshot) {
|
||||||
|
try {
|
||||||
|
const response = await this.client.chat.completions.create({
|
||||||
|
model: process.env.AZURE_OPEN_AI_VISION_MODEL || '',
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: 'You are an AI that analyzes screenshots to identify interactive UI elements and their properties.'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: `Analyze this screenshot and identify all interactive elements (buttons, text fields, dropdowns, etc).
|
||||||
|
For each element, provide:
|
||||||
|
- Type of element
|
||||||
|
- Identifier or descriptive name
|
||||||
|
- Location and size
|
||||||
|
- Any visible text or labels
|
||||||
|
- State (focused, disabled, etc)
|
||||||
|
|
||||||
|
Return in format: {
|
||||||
|
"elements": [{
|
||||||
|
"type": "button|input|dropdown|etc",
|
||||||
|
"identifier": "element-id or descriptive name",
|
||||||
|
"bounds": {"x": number, "y": number, "width": number, "height": number},
|
||||||
|
"text": "visible text",
|
||||||
|
"state": {"focused": boolean, "disabled": boolean}
|
||||||
|
}]
|
||||||
|
}`
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: `data:image/png;base64,${screenshot}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens: 1000,
|
||||||
|
temperature: 0.3
|
||||||
|
});
|
||||||
|
const result = JSON.parse(response.choices[0].message.content || '{}');
|
||||||
|
return {
|
||||||
|
elements: result.elements || [],
|
||||||
|
timestamp: Date.now()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
console.error('Error in analyzeScreen:', error);
|
||||||
|
throw new Error('Failed to analyze screen');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
exports.OpenAIService = OpenAIService;
|
exports.OpenAIService = OpenAIService;
|
||||||
|
|
|
||||||
222
dist/services/recorder.service.js
vendored
222
dist/services/recorder.service.js
vendored
|
|
@ -1,39 +1,198 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
||||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
if (k2 === undefined) k2 = k;
|
||||||
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
||||||
|
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
||||||
|
desc = { enumerable: true, get: function() { return m[k]; } };
|
||||||
|
}
|
||||||
|
Object.defineProperty(o, k2, desc);
|
||||||
|
}) : (function(o, m, k, k2) {
|
||||||
|
if (k2 === undefined) k2 = k;
|
||||||
|
o[k2] = m[k];
|
||||||
|
}));
|
||||||
|
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
||||||
|
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
||||||
|
}) : function(o, v) {
|
||||||
|
o["default"] = v;
|
||||||
|
});
|
||||||
|
var __importStar = (this && this.__importStar) || function (mod) {
|
||||||
|
if (mod && mod.__esModule) return mod;
|
||||||
|
var result = {};
|
||||||
|
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
||||||
|
__setModuleDefault(result, mod);
|
||||||
|
return result;
|
||||||
};
|
};
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.RecorderService = void 0;
|
exports.RecorderService = void 0;
|
||||||
const electron_1 = require("electron");
|
const electron_1 = require("electron");
|
||||||
const dotenv_1 = __importDefault(require("dotenv"));
|
const openai_service_1 = require("../services/openai.service");
|
||||||
dotenv_1.default.config();
|
const _ = require('lodash');
|
||||||
const openai_service_1 = require("./openai.service");
|
const path = __importStar(require("path"));
|
||||||
|
const fs = __importStar(require("fs"));
|
||||||
class RecorderService {
|
class RecorderService {
|
||||||
constructor() {
|
constructor() {
|
||||||
this.events = [];
|
this.events = [];
|
||||||
this.recording = false;
|
this.recording = false;
|
||||||
this.currentScreenshot = '';
|
this.currentScreenshot = '';
|
||||||
|
this.lastTranscription = '';
|
||||||
|
this.recordingProcess = null;
|
||||||
|
this.currentAudioFile = '';
|
||||||
|
this.silenceTimer = null;
|
||||||
|
this.isProcessingAudio = false;
|
||||||
|
this.handleAudioLevel = _.debounce(async (_, level) => {
|
||||||
|
if (!this.recording)
|
||||||
|
return;
|
||||||
|
const SILENCE_THRESHOLD = 0.01;
|
||||||
|
const SILENCE_DURATION = 1000;
|
||||||
|
if (level < SILENCE_THRESHOLD) {
|
||||||
|
if (!this.silenceTimer && !this.isProcessingAudio) {
|
||||||
|
this.silenceTimer = setTimeout(async () => {
|
||||||
|
if (this.recording) {
|
||||||
|
await this.processSilence();
|
||||||
|
}
|
||||||
|
}, SILENCE_DURATION);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (this.silenceTimer) {
|
||||||
|
clearTimeout(this.silenceTimer);
|
||||||
|
this.silenceTimer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 100);
|
||||||
|
this.handleAudioChunk = async (_, chunk) => {
|
||||||
|
if (!this.recording)
|
||||||
|
return;
|
||||||
|
try {
|
||||||
|
const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`);
|
||||||
|
fs.writeFileSync(audioFilePath, chunk);
|
||||||
|
if (this.silenceTimer) {
|
||||||
|
clearTimeout(this.silenceTimer);
|
||||||
|
this.silenceTimer = null;
|
||||||
|
await this.processAudioFile(audioFilePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
console.error('Error handling audio chunk:', error);
|
||||||
|
}
|
||||||
|
};
|
||||||
this.openAIService = new openai_service_1.OpenAIService();
|
this.openAIService = new openai_service_1.OpenAIService();
|
||||||
|
this.tempDir = path.join(process.cwd(), 'temp_recordings');
|
||||||
|
if (!fs.existsSync(this.tempDir)) {
|
||||||
|
fs.mkdirSync(this.tempDir, { recursive: true });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
async startRecording() {
|
async startRecording() {
|
||||||
|
try {
|
||||||
this.recording = true;
|
this.recording = true;
|
||||||
this.events = [];
|
this.events = [];
|
||||||
this.requestScreenshot();
|
await this.setupAudioRecording();
|
||||||
|
await this.requestScreenshot();
|
||||||
|
electron_1.ipcRenderer.on('keyboard-event', this.keyboardHandleEvent); // Listen for keyboard events
|
||||||
}
|
}
|
||||||
stopRecording() {
|
catch (error) {
|
||||||
|
console.error('Failed to start recording:', error);
|
||||||
this.recording = false;
|
this.recording = false;
|
||||||
return this.generateBasicCode();
|
throw error;
|
||||||
}
|
}
|
||||||
requestScreenshot() {
|
}
|
||||||
// Notify renderer process to capture a screenshot
|
async setupAudioRecording() {
|
||||||
const allWebContents = electron_1.screen.getAllDisplays();
|
try {
|
||||||
allWebContents.forEach((webContents) => {
|
this.recordingProcess = await electron_1.ipcRenderer.invoke('start-audio-recording');
|
||||||
//@ts-ignores
|
electron_1.ipcRenderer.on('audio-level', this.handleAudioLevel);
|
||||||
webContents.send('request-screenshot');
|
electron_1.ipcRenderer.on('audio-chunk', this.handleAudioChunk);
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
console.error('Error setting up audio recording:', error);
|
||||||
|
throw new Error(`Failed to setup audio recording: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async processSilence() {
|
||||||
|
if (this.isProcessingAudio)
|
||||||
|
return;
|
||||||
|
this.isProcessingAudio = true;
|
||||||
|
try {
|
||||||
|
const audioFilePath = await electron_1.ipcRenderer.invoke('save-audio-chunk');
|
||||||
|
if (audioFilePath) {
|
||||||
|
this.currentAudioFile = audioFilePath;
|
||||||
|
await this.processAudioFile(audioFilePath);
|
||||||
|
await this.requestScreenshot();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
console.error('Error processing silence:', error);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
this.isProcessingAudio = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async processAudioFile(audioFilePath) {
|
||||||
|
try {
|
||||||
|
const audioBuffer = fs.readFileSync(audioFilePath);
|
||||||
|
const transcription = await this.openAIService.transcribeAudio(new Blob([audioBuffer], { type: 'audio/wav' }));
|
||||||
|
if (transcription.text.trim()) {
|
||||||
|
await this.processTranscription(transcription);
|
||||||
|
}
|
||||||
|
fs.unlinkSync(audioFilePath);
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
console.error('Error processing audio file:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async processTranscription(transcription) {
|
||||||
|
this.lastTranscription = transcription.text;
|
||||||
|
const analysis = await this.openAIService.analyzeScreenWithContext({
|
||||||
|
screenshot: this.currentScreenshot,
|
||||||
|
transcription: this.lastTranscription,
|
||||||
|
cursorPosition: await electron_1.ipcRenderer.invoke('get-cursor-position')
|
||||||
|
});
|
||||||
|
if (analysis) {
|
||||||
|
this.events.push({
|
||||||
|
type: analysis.type,
|
||||||
|
identifier: analysis.identifier,
|
||||||
|
value: analysis.value,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
narration: this.lastTranscription
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
async stopRecording() {
|
||||||
|
this.recording = false;
|
||||||
|
if (this.silenceTimer) {
|
||||||
|
clearTimeout(this.silenceTimer);
|
||||||
|
this.silenceTimer = null;
|
||||||
|
}
|
||||||
|
await electron_1.ipcRenderer.invoke('stop-audio-recording');
|
||||||
|
electron_1.ipcRenderer.removeListener('audio-level', this.handleAudioLevel);
|
||||||
|
electron_1.ipcRenderer.removeListener('audio-chunk', this.handleAudioChunk);
|
||||||
|
electron_1.ipcRenderer.removeListener('keyboard-event', this.keyboardHandleEvent); // Remove keyboard listener
|
||||||
|
if (this.currentAudioFile && fs.existsSync(this.currentAudioFile)) {
|
||||||
|
fs.unlinkSync(this.currentAudioFile);
|
||||||
|
}
|
||||||
|
return this.generateBasicCode();
|
||||||
|
}
|
||||||
|
async requestScreenshot() {
|
||||||
|
try {
|
||||||
|
const sources = await electron_1.ipcRenderer.invoke('get-screenshot');
|
||||||
|
const screenSource = sources[0];
|
||||||
|
await this.screenshotHandleEvent(null, screenSource.thumbnail);
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
console.error('Error capturing screenshot:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
async screenshotHandleEvent(_, screenshot) {
|
async screenshotHandleEvent(_, screenshot) {
|
||||||
this.currentScreenshot = screenshot; // Store the screenshot as a base64 image
|
this.currentScreenshot = screenshot;
|
||||||
|
}
|
||||||
|
async keyboardHandleEvent(_, event) {
|
||||||
|
if (!this.recording)
|
||||||
|
return;
|
||||||
|
this.events.push({
|
||||||
|
type: 'type',
|
||||||
|
identifier: event.key,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
narration: this.lastTranscription
|
||||||
|
});
|
||||||
}
|
}
|
||||||
async mouseHandleEvent(_, event) {
|
async mouseHandleEvent(_, event) {
|
||||||
if (!this.recording)
|
if (!this.recording)
|
||||||
|
|
@ -45,44 +204,39 @@ class RecorderService {
|
||||||
type: 'click',
|
type: 'click',
|
||||||
identifier: element.identifier,
|
identifier: element.identifier,
|
||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
});
|
narration: this.lastTranscription
|
||||||
}
|
|
||||||
}
|
|
||||||
async keyboardHandleEvent(_, event) {
|
|
||||||
if (!this.recording)
|
|
||||||
return;
|
|
||||||
const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
|
|
||||||
const focusedElement = this.findFocusedElement(analysis);
|
|
||||||
if (focusedElement) {
|
|
||||||
this.events.push({
|
|
||||||
type: 'type',
|
|
||||||
identifier: focusedElement.identifier,
|
|
||||||
value: event.key,
|
|
||||||
timestamp: Date.now(),
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
findElementAtPosition(analysis, x, y) {
|
findElementAtPosition(analysis, x, y) {
|
||||||
|
//@ts-nocheck
|
||||||
return analysis.elements.find((element) => {
|
return analysis.elements.find((element) => {
|
||||||
const bounds = element.bounds;
|
const bounds = element.bounds;
|
||||||
return x >= bounds.x && x <= bounds.x + bounds.width && y >= bounds.y && y <= bounds.y + bounds.height;
|
return x >= bounds.x &&
|
||||||
|
x <= bounds.x + bounds.width &&
|
||||||
|
y >= bounds.y &&
|
||||||
|
y <= bounds.y + bounds.height;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
findFocusedElement(analysis) {
|
|
||||||
//@ts-ignore
|
|
||||||
return analysis.elements.find((element) => element.focused);
|
|
||||||
}
|
|
||||||
generateBasicCode() {
|
generateBasicCode() {
|
||||||
let basicCode = '10 REM BotDesktop Automation Script\n';
|
let basicCode = '10 REM BotDesktop Automation Script\n';
|
||||||
let lineNumber = 20;
|
let lineNumber = 20;
|
||||||
for (const event of this.events) {
|
for (const event of this.events) {
|
||||||
|
basicCode += `${lineNumber} REM ${event.narration}\n`;
|
||||||
|
lineNumber += 10;
|
||||||
switch (event.type) {
|
switch (event.type) {
|
||||||
case 'click':
|
case 'click':
|
||||||
basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
|
basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
|
||||||
break;
|
break;
|
||||||
|
case 'type':
|
||||||
|
basicCode += `${lineNumber} TYPE "${event.identifier}"\n`;
|
||||||
|
break;
|
||||||
case 'type':
|
case 'type':
|
||||||
basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
|
basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
|
||||||
break;
|
break;
|
||||||
|
case 'move':
|
||||||
|
basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
lineNumber += 10;
|
lineNumber += 10;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
12
gencode.sh
Executable file
12
gencode.sh
Executable file
|
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Remove output.txt if it exists to start fresh
|
||||||
|
rm -f output.txt
|
||||||
|
|
||||||
|
# Find all .ts and .tsx files excluding node_modules, and concatenate filename and contents into output.txt
|
||||||
|
find . -type f \( -name "*.ts" -o -name "*.tsx" \) -not -path "*/node_modules/*" | while read -r file; do
|
||||||
|
echo -e "\n// File: $file\n" >> output.txt
|
||||||
|
cat "$file" >> output.txt
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "All TypeScript (.ts and .tsx) code has been combined into output.txt with filenames as headers, excluding node_modules"
|
||||||
8905
package-lock.json
generated
Normal file
8905
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -10,13 +10,15 @@
|
||||||
"test": "vitest"
|
"test": "vitest"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"dotenv": "^16.4.5",
|
|
||||||
"node-global-key-listener": "^0.3.0",
|
|
||||||
"node-mouse": "^0.0.2",
|
|
||||||
"@types/node": "^20.0.0",
|
"@types/node": "^20.0.0",
|
||||||
"@types/react": "^18.0.0",
|
"@types/react": "^18.0.0",
|
||||||
"@types/react-dom": "^18.0.0",
|
"@types/react-dom": "^18.0.0",
|
||||||
|
"debounce": "^2.2.0",
|
||||||
|
"dotenv": "^16.4.5",
|
||||||
"electron": "^28.0.0",
|
"electron": "^28.0.0",
|
||||||
|
"lodash": "^4.17.21",
|
||||||
|
"node-global-key-listener": "^0.3.0",
|
||||||
|
"node-mouse": "^0.0.2",
|
||||||
"openai": "^4.28.0",
|
"openai": "^4.28.0",
|
||||||
"react": "^18.2.0",
|
"react": "^18.2.0",
|
||||||
"react-dom": "^18.2.0",
|
"react-dom": "^18.2.0",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
|
require('dotenv').config();
|
||||||
import { app, BrowserWindow, ipcMain } from 'electron';
|
import { app, BrowserWindow, ipcMain } from 'electron';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
|
// In main.ts
|
||||||
|
import { systemPreferences } from 'electron';
|
||||||
import { RecorderService } from '../services/recorder.service';
|
import { RecorderService } from '../services/recorder.service';
|
||||||
import { PlayerService } from '../services/player.service';
|
import { PlayerService } from '../services/player.service';
|
||||||
|
|
||||||
|
|
@ -56,3 +58,21 @@ ipcMain.handle('stop-recording', async () => {
|
||||||
ipcMain.handle('execute-basic-code', async (_, code: string) => {
|
ipcMain.handle('execute-basic-code', async (_, code: string) => {
|
||||||
await player.executeBasicCode(code);
|
await player.executeBasicCode(code);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
// Add microphone permission check for macOS
|
||||||
|
ipcMain.handle('check-microphone-permission', async () => {
|
||||||
|
if (process.platform === 'darwin') {
|
||||||
|
const status = await systemPreferences.getMediaAccessStatus('microphone');
|
||||||
|
if (status !== 'granted') {
|
||||||
|
const success = await systemPreferences.askForMediaAccess('microphone');
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// On Windows/Linux, permissions are handled by the OS
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Enable required permissions
|
||||||
|
app.commandLine.appendSwitch('enable-speech-dispatcher');
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
// Preload script goes here
|
|
||||||
|
|
@ -1,36 +1,155 @@
|
||||||
import { AzureOpenAI } from 'openai';
|
import { AzureOpenAI } from 'openai';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import { ScreenAnalysis } from './types';
|
import { ScreenAnalysis, ScreenContext, WhisperResponse, AutomationAction } from './types';
|
||||||
|
|
||||||
|
const { Readable } = require('stream');
|
||||||
|
|
||||||
export class OpenAIService {
|
export class OpenAIService {
|
||||||
private client: AzureOpenAI;
|
private client: AzureOpenAI;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.client = new AzureOpenAI({ dangerouslyAllowBrowser: true,
|
this.client = new AzureOpenAI({
|
||||||
|
dangerouslyAllowBrowser: true,
|
||||||
endpoint: process.env.AZURE_OPEN_AI_ENDPOINT || '',
|
endpoint: process.env.AZURE_OPEN_AI_ENDPOINT || '',
|
||||||
deployment: process.env.AZURE_OPEN_AI_IMAGE_MODEL || '',
|
apiVersion: process.env.OPENAI_API_VERSION || '2024-02-15-preview',
|
||||||
apiVersion: process.env.OPENAI_API_VERSION || '',
|
|
||||||
apiKey: process.env.AZURE_OPEN_AI_KEY || ''
|
apiKey: process.env.AZURE_OPEN_AI_KEY || ''
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async analyzeScreen(imagePath: string): Promise<ScreenAnalysis> {
|
async transcribeAudio(audioBlob: Blob): Promise<WhisperResponse> {
|
||||||
const imageBuffer = fs.readFileSync(imagePath);
|
try {
|
||||||
const base64Image = imageBuffer.toString('base64');
|
// Convert Blob to ArrayBuffer
|
||||||
|
const arrayBuffer = await audioBlob.arrayBuffer();
|
||||||
|
|
||||||
|
// Convert Buffer to a Readable stream
|
||||||
|
const buffer = Buffer.from(arrayBuffer);
|
||||||
|
const stream = new Readable();
|
||||||
|
stream.push(buffer);
|
||||||
|
stream.push(null); // Signal the end of the stream
|
||||||
|
|
||||||
|
const response = await this.client.audio.transcriptions.create({
|
||||||
|
file: stream,
|
||||||
|
model: process.env.AZURE_OPEN_AI_WHISPER_MODEL || 'whisper-1',
|
||||||
|
language: 'en',
|
||||||
|
response_format: 'verbose_json'
|
||||||
|
}); return {
|
||||||
|
text: response.text,
|
||||||
|
//@ts-ignore
|
||||||
|
segments: response.segments?.map(seg => ({
|
||||||
|
text: seg.text,
|
||||||
|
start: seg.start,
|
||||||
|
end: seg.end
|
||||||
|
})) || []
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in transcribeAudio:', error);
|
||||||
|
throw new Error('Failed to transcribe audio');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async analyzeScreenWithContext(context: ScreenContext): Promise<AutomationAction> {
|
||||||
|
try {
|
||||||
const response = await this.client.chat.completions.create({
|
const response = await this.client.chat.completions.create({
|
||||||
model: process.env.AZURE_OPEN_AI_LLM_MODEL || '',
|
model: process.env.AZURE_OPEN_AI_VISION_MODEL || '',
|
||||||
messages: [
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: `You are an AI that analyzes screenshots and voice commands to determine user intentions for automation.
|
||||||
|
You should identify UI elements and return specific actions in JSON format.
|
||||||
|
Focus on the area near the cursor position when relevant.`
|
||||||
|
},
|
||||||
{
|
{
|
||||||
role: 'user',
|
role: 'user',
|
||||||
content: [
|
content: [
|
||||||
{ type: 'text', text: 'Analyze this screenshot and identify all interactive elements (buttons, text fields, etc). Return their locations and identifiers.' },
|
{
|
||||||
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64Image}` } }
|
type: 'text',
|
||||||
],
|
text: `Analyze this screenshot with the following context:
|
||||||
|
Voice Command: "${context.transcription}"
|
||||||
|
Cursor Position: x=${context.cursorPosition.x}, y=${context.cursorPosition.y}
|
||||||
|
|
||||||
|
Identify the most likely action based on the voice command and cursor position.
|
||||||
|
Return in format: {
|
||||||
|
"type": "click|type|move",
|
||||||
|
"identifier": "element-id or descriptive name",
|
||||||
|
"value": "text to type (for type actions)",
|
||||||
|
"confidence": 0-1,
|
||||||
|
"bounds": {"x": number, "y": number, "width": number, "height": number}
|
||||||
|
}`
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: `data:image/png;base64,${context.screenshot}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
],
|
],
|
||||||
|
max_tokens: 500,
|
||||||
|
temperature: 0.3
|
||||||
});
|
});
|
||||||
|
|
||||||
return JSON.parse(response.choices[0].message.content || '{}');
|
const result = JSON.parse(response.choices[0].message.content || '{}');
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in analyzeScreenWithContext:', error);
|
||||||
|
throw new Error('Failed to analyze screen context');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async analyzeScreen(screenshot: string): Promise<ScreenAnalysis> {
|
||||||
|
try {
|
||||||
|
const response = await this.client.chat.completions.create({
|
||||||
|
model: process.env.AZURE_OPEN_AI_VISION_MODEL || '',
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: 'You are an AI that analyzes screenshots to identify interactive UI elements and their properties.'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: `Analyze this screenshot and identify all interactive elements (buttons, text fields, dropdowns, etc).
|
||||||
|
For each element, provide:
|
||||||
|
- Type of element
|
||||||
|
- Identifier or descriptive name
|
||||||
|
- Location and size
|
||||||
|
- Any visible text or labels
|
||||||
|
- State (focused, disabled, etc)
|
||||||
|
|
||||||
|
Return in format: {
|
||||||
|
"elements": [{
|
||||||
|
"type": "button|input|dropdown|etc",
|
||||||
|
"identifier": "element-id or descriptive name",
|
||||||
|
"bounds": {"x": number, "y": number, "width": number, "height": number},
|
||||||
|
"text": "visible text",
|
||||||
|
"state": {"focused": boolean, "disabled": boolean}
|
||||||
|
}]
|
||||||
|
}`
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: {
|
||||||
|
url: `data:image/png;base64,${screenshot}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
max_tokens: 1000,
|
||||||
|
temperature: 0.3
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = JSON.parse(response.choices[0].message.content || '{}');
|
||||||
|
return {
|
||||||
|
elements: result.elements || [],
|
||||||
|
timestamp: Date.now()
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in analyzeScreen:', error);
|
||||||
|
throw new Error('Failed to analyze screen');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,41 +1,192 @@
|
||||||
import { screen, ipcMain } from 'electron';
|
import { ipcRenderer } from 'electron';
|
||||||
import { AutomationEvent, ScreenAnalysis } from './types';
|
import { AutomationEvent, ScreenAnalysis, WhisperResponse } from '../services/types';
|
||||||
import dotenv from 'dotenv';
|
import { OpenAIService } from '../services/openai.service';
|
||||||
dotenv.config();
|
const _ = require('lodash');
|
||||||
import { OpenAIService } from './openai.service';
|
import * as path from 'path';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
|
||||||
export class RecorderService {
|
export class RecorderService {
|
||||||
private events: AutomationEvent[] = [];
|
private events: AutomationEvent[] = [];
|
||||||
private recording: boolean = false;
|
private recording: boolean = false;
|
||||||
private openAIService: OpenAIService;
|
private openAIService: OpenAIService;
|
||||||
private currentScreenshot: string = '';
|
private currentScreenshot: string = '';
|
||||||
|
private lastTranscription: string = '';
|
||||||
|
private recordingProcess: any = null;
|
||||||
|
private tempDir: string;
|
||||||
|
private currentAudioFile: string = '';
|
||||||
|
private silenceTimer: NodeJS.Timeout | null = null;
|
||||||
|
private isProcessingAudio: boolean = false;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.openAIService = new OpenAIService();
|
this.openAIService = new OpenAIService();
|
||||||
|
this.tempDir = path.join(process.cwd(), 'temp_recordings');
|
||||||
|
if (!fs.existsSync(this.tempDir)) {
|
||||||
|
fs.mkdirSync(this.tempDir, { recursive: true });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public async startRecording() {
|
public async startRecording() {
|
||||||
|
try {
|
||||||
this.recording = true;
|
this.recording = true;
|
||||||
this.events = [];
|
this.events = [];
|
||||||
this.requestScreenshot();
|
await this.setupAudioRecording();
|
||||||
|
await this.requestScreenshot();
|
||||||
|
ipcRenderer.on('keyboard-event', this.keyboardHandleEvent); // Listen for keyboard events
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to start recording:', error);
|
||||||
|
this.recording = false;
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public stopRecording(): string {
|
private async setupAudioRecording() {
|
||||||
|
try {
|
||||||
|
this.recordingProcess = await ipcRenderer.invoke('start-audio-recording');
|
||||||
|
ipcRenderer.on('audio-level', this.handleAudioLevel);
|
||||||
|
ipcRenderer.on('audio-chunk', this.handleAudioChunk);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error setting up audio recording:', error);
|
||||||
|
throw new Error(`Failed to setup audio recording: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private handleAudioLevel = _.debounce(async (_: any, level: number) => {
|
||||||
|
if (!this.recording) return;
|
||||||
|
|
||||||
|
const SILENCE_THRESHOLD = 0.01;
|
||||||
|
const SILENCE_DURATION = 1000;
|
||||||
|
|
||||||
|
if (level < SILENCE_THRESHOLD) {
|
||||||
|
if (!this.silenceTimer && !this.isProcessingAudio) {
|
||||||
|
this.silenceTimer = setTimeout(async () => {
|
||||||
|
if (this.recording) {
|
||||||
|
await this.processSilence();
|
||||||
|
}
|
||||||
|
}, SILENCE_DURATION);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (this.silenceTimer) {
|
||||||
|
clearTimeout(this.silenceTimer);
|
||||||
|
this.silenceTimer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 100);
|
||||||
|
|
||||||
|
private handleAudioChunk = async (_: any, chunk: Buffer) => {
|
||||||
|
if (!this.recording) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const audioFilePath = path.join(this.tempDir, `audio-${Date.now()}.wav`);
|
||||||
|
fs.writeFileSync(audioFilePath, chunk);
|
||||||
|
|
||||||
|
if (this.silenceTimer) {
|
||||||
|
clearTimeout(this.silenceTimer);
|
||||||
|
this.silenceTimer = null;
|
||||||
|
await this.processAudioFile(audioFilePath);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error handling audio chunk:', error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
private async processSilence() {
|
||||||
|
if (this.isProcessingAudio) return;
|
||||||
|
|
||||||
|
this.isProcessingAudio = true;
|
||||||
|
try {
|
||||||
|
const audioFilePath = await ipcRenderer.invoke('save-audio-chunk');
|
||||||
|
if (audioFilePath) {
|
||||||
|
this.currentAudioFile = audioFilePath;
|
||||||
|
await this.processAudioFile(audioFilePath);
|
||||||
|
await this.requestScreenshot();
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error processing silence:', error);
|
||||||
|
} finally {
|
||||||
|
this.isProcessingAudio = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async processAudioFile(audioFilePath: string) {
|
||||||
|
try {
|
||||||
|
const audioBuffer = fs.readFileSync(audioFilePath);
|
||||||
|
const transcription = await this.openAIService.transcribeAudio(
|
||||||
|
new Blob([audioBuffer], { type: 'audio/wav' })
|
||||||
|
);
|
||||||
|
|
||||||
|
if (transcription.text.trim()) {
|
||||||
|
await this.processTranscription(transcription);
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.unlinkSync(audioFilePath);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error processing audio file:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async processTranscription(transcription: WhisperResponse) {
|
||||||
|
this.lastTranscription = transcription.text;
|
||||||
|
|
||||||
|
const analysis = await this.openAIService.analyzeScreenWithContext({
|
||||||
|
screenshot: this.currentScreenshot,
|
||||||
|
transcription: this.lastTranscription,
|
||||||
|
cursorPosition: await ipcRenderer.invoke('get-cursor-position')
|
||||||
|
});
|
||||||
|
|
||||||
|
if (analysis) {
|
||||||
|
this.events.push({
|
||||||
|
type: analysis.type,
|
||||||
|
identifier: analysis.identifier,
|
||||||
|
value: analysis.value,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
narration: this.lastTranscription
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async stopRecording(): Promise<string> {
|
||||||
this.recording = false;
|
this.recording = false;
|
||||||
|
|
||||||
|
if (this.silenceTimer) {
|
||||||
|
clearTimeout(this.silenceTimer);
|
||||||
|
this.silenceTimer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
await ipcRenderer.invoke('stop-audio-recording');
|
||||||
|
ipcRenderer.removeListener('audio-level', this.handleAudioLevel);
|
||||||
|
ipcRenderer.removeListener('audio-chunk', this.handleAudioChunk);
|
||||||
|
ipcRenderer.removeListener('keyboard-event', this.keyboardHandleEvent); // Remove keyboard listener
|
||||||
|
|
||||||
|
if (this.currentAudioFile && fs.existsSync(this.currentAudioFile)) {
|
||||||
|
fs.unlinkSync(this.currentAudioFile);
|
||||||
|
}
|
||||||
|
|
||||||
return this.generateBasicCode();
|
return this.generateBasicCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
private requestScreenshot() {
|
private async requestScreenshot() {
|
||||||
// Notify renderer process to capture a screenshot
|
try {
|
||||||
const allWebContents = screen.getAllDisplays();
|
const sources = await ipcRenderer.invoke('get-screenshot');
|
||||||
allWebContents.forEach((webContents) => {
|
const screenSource = sources[0];
|
||||||
//@ts-ignores
|
await this.screenshotHandleEvent(null, screenSource.thumbnail);
|
||||||
webContents.send('request-screenshot');
|
} catch (error) {
|
||||||
});
|
console.error('Error capturing screenshot:', error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public async screenshotHandleEvent (_: any, screenshot: string) {
|
public async screenshotHandleEvent(_: any, screenshot: string) {
|
||||||
this.currentScreenshot = screenshot; // Store the screenshot as a base64 image
|
this.currentScreenshot = screenshot;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async keyboardHandleEvent(_: any, event: KeyboardEvent) {
|
||||||
|
if (!this.recording) return;
|
||||||
|
|
||||||
|
this.events.push({
|
||||||
|
type: 'type',
|
||||||
|
identifier: event.key,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
narration: this.lastTranscription
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public async mouseHandleEvent(_: any, event: any) {
|
public async mouseHandleEvent(_: any, event: any) {
|
||||||
|
|
@ -49,50 +200,43 @@ export class RecorderService {
|
||||||
type: 'click',
|
type: 'click',
|
||||||
identifier: element.identifier,
|
identifier: element.identifier,
|
||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
});
|
narration: this.lastTranscription
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public async keyboardHandleEvent(_: any, event: any) {
|
|
||||||
if (!this.recording) return;
|
|
||||||
|
|
||||||
const analysis = await this.openAIService.analyzeScreen(this.currentScreenshot);
|
|
||||||
const focusedElement = this.findFocusedElement(analysis);
|
|
||||||
|
|
||||||
if (focusedElement) {
|
|
||||||
this.events.push({
|
|
||||||
type: 'type',
|
|
||||||
identifier: focusedElement.identifier,
|
|
||||||
value: event.key,
|
|
||||||
timestamp: Date.now(),
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private findElementAtPosition(analysis: ScreenAnalysis, x: number, y: number) {
|
private findElementAtPosition(analysis: ScreenAnalysis, x: number, y: number) {
|
||||||
|
//@ts-nocheck
|
||||||
return analysis.elements.find((element) => {
|
return analysis.elements.find((element) => {
|
||||||
const bounds = element.bounds;
|
const bounds = element.bounds;
|
||||||
return x >= bounds.x && x <= bounds.x + bounds.width && y >= bounds.y && y <= bounds.y + bounds.height;
|
return x >= bounds.x &&
|
||||||
|
x <= bounds.x + bounds.width &&
|
||||||
|
y >= bounds.y &&
|
||||||
|
y <= bounds.y + bounds.height;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private findFocusedElement(analysis: ScreenAnalysis) {
|
|
||||||
//@ts-ignore
|
|
||||||
return analysis.elements.find((element) => element.focused);
|
|
||||||
}
|
|
||||||
|
|
||||||
private generateBasicCode(): string {
|
private generateBasicCode(): string {
|
||||||
let basicCode = '10 REM BotDesktop Automation Script\n';
|
let basicCode = '10 REM BotDesktop Automation Script\n';
|
||||||
let lineNumber = 20;
|
let lineNumber = 20;
|
||||||
|
|
||||||
for (const event of this.events) {
|
for (const event of this.events) {
|
||||||
|
basicCode += `${lineNumber} REM ${event.narration}\n`;
|
||||||
|
lineNumber += 10;
|
||||||
|
|
||||||
switch (event.type) {
|
switch (event.type) {
|
||||||
case 'click':
|
case 'click':
|
||||||
basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
|
basicCode += `${lineNumber} CLICK "${event.identifier}"\n`;
|
||||||
break;
|
break;
|
||||||
|
case 'type':
|
||||||
|
basicCode += `${lineNumber} TYPE "${event.identifier}"\n`;
|
||||||
|
break;
|
||||||
case 'type':
|
case 'type':
|
||||||
basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
|
basicCode += `${lineNumber} TYPE "${event.identifier}" "${event.value}"\n`;
|
||||||
break;
|
break;
|
||||||
|
case 'move':
|
||||||
|
basicCode += `${lineNumber} MOVE "${event.identifier}"\n`;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
lineNumber += 10;
|
lineNumber += 10;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,37 @@
|
||||||
|
export interface AutomationAction {
|
||||||
|
type: 'click' | 'type' | 'move';
|
||||||
|
identifier: string;
|
||||||
|
value?: string;
|
||||||
|
confidence: number;
|
||||||
|
bounds: {
|
||||||
|
x: number;
|
||||||
|
y: number;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export interface AutomationEvent {
|
export interface AutomationEvent {
|
||||||
type: 'click' | 'type' | 'move';
|
type: 'click' | 'type' | 'move';
|
||||||
identifier: string;
|
identifier: string;
|
||||||
value?: string;
|
value?: string;
|
||||||
timestamp: number;
|
timestamp: number;
|
||||||
|
narration: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface WhisperResponse {
|
||||||
|
text: string;
|
||||||
|
segments:any;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ScreenContext {
|
||||||
|
screenshot: string;
|
||||||
|
transcription: string;
|
||||||
|
cursorPosition: { x: number, y: number };
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ScreenAnalysis {
|
export interface ScreenAnalysis {
|
||||||
|
timestamp: number,
|
||||||
elements: {
|
elements: {
|
||||||
identifier: string;
|
identifier: string;
|
||||||
type: string;
|
type: string;
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ const path = require('path');
|
||||||
const HtmlWebpackPlugin = require('html-webpack-plugin');
|
const HtmlWebpackPlugin = require('html-webpack-plugin');
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
devtool: 'source-map',
|
||||||
entry: './src/renderer/index.tsx',
|
entry: './src/renderer/index.tsx',
|
||||||
target: 'electron-renderer',
|
target: 'electron-renderer',
|
||||||
module: {
|
module: {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue