botui/dist/services/openai.service.js

151 lines
6.3 KiB
JavaScript
Raw Normal View History

2024-10-26 13:05:56 -03:00
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.OpenAIService = void 0;
const openai_1 = require("openai");
2024-10-26 16:26:11 -03:00
const { Readable } = require('stream');
2024-10-26 13:05:56 -03:00
class OpenAIService {
constructor() {
2024-10-26 16:26:11 -03:00
this.client = new openai_1.AzureOpenAI({
dangerouslyAllowBrowser: true,
2024-10-26 13:05:56 -03:00
endpoint: process.env.AZURE_OPEN_AI_ENDPOINT || '',
2024-10-26 16:26:11 -03:00
apiVersion: process.env.OPENAI_API_VERSION || '2024-02-15-preview',
2024-10-26 13:05:56 -03:00
apiKey: process.env.AZURE_OPEN_AI_KEY || ''
});
}
2024-10-26 16:26:11 -03:00
async transcribeAudio(audioBlob) {
try {
// Convert Blob to ArrayBuffer
const arrayBuffer = await audioBlob.arrayBuffer();
// Convert Buffer to a Readable stream
const buffer = Buffer.from(arrayBuffer);
const stream = new Readable();
stream.push(buffer);
stream.push(null); // Signal the end of the stream
const response = await this.client.audio.transcriptions.create({
file: stream,
model: process.env.AZURE_OPEN_AI_WHISPER_MODEL || 'whisper-1',
language: 'en',
response_format: 'verbose_json'
});
return {
text: response.text,
//@ts-ignore
segments: response.segments?.map(seg => ({
text: seg.text,
start: seg.start,
end: seg.end
})) || []
};
}
catch (error) {
console.error('Error in transcribeAudio:', error);
throw new Error('Failed to transcribe audio');
}
}
async analyzeScreenWithContext(context) {
try {
const response = await this.client.chat.completions.create({
model: process.env.AZURE_OPEN_AI_VISION_MODEL || '',
messages: [
{
role: 'system',
content: `You are an AI that analyzes screenshots and voice commands to determine user intentions for automation.
You should identify UI elements and return specific actions in JSON format.
Focus on the area near the field ${context.identifier}.`
2024-10-26 16:26:11 -03:00
},
{
role: 'user',
content: [
{
type: 'text',
text: `Analyze this screenshot with the following context:
Voice Command: "${context.transcription}"
Cursor Position: x=${context.cursorPosition.x}, y=${context.cursorPosition.y}
Identify the most likely action based on the voice command and cursor position.
Return in format: {
"type": "click|type|move",
"identifier": "element-id or descriptive name",
"value": "text to type (for type actions)",
"confidence": 0-1,
"bounds": {"x": number, "y": number, "width": number, "height": number}
}`
},
{
type: 'image_url',
image_url: {
url: `data:image/png;base64,${context.screenshot}`
}
}
]
}
],
max_tokens: 500,
temperature: 0.3
});
const result = JSON.parse(response.choices[0].message.content || '{}');
return result;
}
catch (error) {
console.error('Error in analyzeScreenWithContext:', error);
throw new Error('Failed to analyze screen context');
}
}
async analyzeScreen(screenshot) {
try {
const response = await this.client.chat.completions.create({
model: process.env.AZURE_OPEN_AI_VISION_MODEL || '',
messages: [
{
role: 'system',
content: 'You are an AI that analyzes screenshots to identify interactive UI elements and their properties.'
},
{
role: 'user',
content: [
{
type: 'text',
text: `Analyze this screenshot and identify all interactive elements (buttons, text fields, dropdowns, etc).
For each element, provide:
- Type of element
- Identifier or descriptive name
- Location and size
- Any visible text or labels
- State (focused, disabled, etc)
Return in format: {
"elements": [{
"type": "button|input|dropdown|etc",
"identifier": "element-id or descriptive name",
"bounds": {"x": number, "y": number, "width": number, "height": number},
"text": "visible text",
"state": {"focused": boolean, "disabled": boolean}
}]
}`
},
{
type: 'image_url',
image_url: {
url: `data:image/png;base64,${screenshot}`
}
}
]
}
],
max_tokens: 1000,
temperature: 0.3
});
const result = JSON.parse(response.choices[0].message.content || '{}');
return {
elements: result.elements || [],
timestamp: Date.now()
};
}
catch (error) {
console.error('Error in analyzeScreen:', error);
throw new Error('Failed to analyze screen');
}
2024-10-26 13:05:56 -03:00
}
}
exports.OpenAIService = OpenAIService;