botserver/packages/llm.gblib/services/ImageServices.ts

112 lines
5.6 KiB
TypeScript
Raw Normal View History

2023-07-23 10:59:59 -03:00
/*****************************************************************************\
| ® |
| |
| |
| |
| |
2023-07-23 10:59:59 -03:00
| |
| General Bots Copyright (c) pragmatismo.cloud. All rights reserved. |
2023-07-23 10:59:59 -03:00
| Licensed under the AGPL-3.0. |
| |
| According to our dual licensing model, this program can be used either |
| under the terms of the GNU Affero General Public License, version 3, |
2023-07-23 10:59:59 -03:00
| or under a proprietary license. |
| |
| The texts of the GNU Affero General Public License with an additional |
| permission and of our proprietary license can be found at and |
| in the LICENSE file you have received along with this program. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY, without even the implied warranty of |
2023-07-23 10:59:59 -03:00
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU Affero General Public License for more details. |
| |
| "General Bots" is a registered trademark of pragmatismo.cloud. |
2023-07-23 10:59:59 -03:00
| The licensing of the program under the AGPLv3 does not imply a |
| trademark license. Therefore any rights, title and interest in |
2023-07-23 10:59:59 -03:00
| our trademarks remain entirely with us. |
| |
\*****************************************************************************/
'use strict';
2024-08-16 10:43:15 -03:00
import { GBMinInstance } from 'botlib';
2024-08-18 17:51:03 -03:00
import OpenAI from 'openai';
2024-08-16 10:43:15 -03:00
import { AzureKeyCredential } from '@azure/core-auth';
import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords';
2024-09-06 15:30:03 -03:00
import path from 'path';
2024-08-16 10:43:15 -03:00
import { GBServer } from '../../../src/app.js';
2024-09-07 18:13:36 -03:00
import fs from 'fs/promises';
2024-08-16 10:43:15 -03:00
import urlJoin from 'url-join';
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService';
import { GBLogEx } from '../../core.gbapp/services/GBLogEx';
import { GBUtil } from '../../../src/util';
2024-08-16 10:43:15 -03:00
/**
* Image processing services of conversation to be called by BASIC.
*/
export class ImageServices {
public async getImageFromPrompt({ pid, prompt }) {
const { min, user, params } = await DialogKeywords.getProcessInfo(pid);
const azureOpenAIKey = await min.core.getParam(min.instance, 'Azure Open AI Key', null);
const azureOpenAIImageModel = await min.core.getParam(min.instance, 'Azure Open Image Model', null);
const azureOpenAIEndpoint = await min.core.getParam(min.instance, 'Azure Open AI Endpoint', null);
if (azureOpenAIKey) {
// Initialize the Azure OpenAI client
2024-08-18 17:51:03 -03:00
const client = new OpenAI({ apiKey: azureOpenAIKey, baseURL: azureOpenAIEndpoint });
2024-08-16 10:43:15 -03:00
// Make a request to the image generation endpoint
2024-08-18 17:51:03 -03:00
const response = await client.images.generate({
2024-08-16 10:43:15 -03:00
prompt: prompt,
n: 1,
size: '1024x1024'
});
const gbaiName = GBUtil.getGBAIPath(min.botId);
2024-09-06 15:30:03 -03:00
const localName = path.join('work', gbaiName, 'cache', `DALL-E${GBAdminService.getRndReadableIdentifier()}.png`);
2024-08-16 10:43:15 -03:00
const url = response.data[0].url;
const res = await fetch(url);
let buf: any = Buffer.from(await res.arrayBuffer());
2024-09-10 23:25:07 -03:00
await fs.writeFile(localName, buf, { encoding: null });
2024-08-16 10:43:15 -03:00
2024-08-23 23:36:20 -03:00
GBLogEx.info(min, `DALL-E image generated at ${url}.`);
2024-08-16 10:43:15 -03:00
2024-08-18 17:51:03 -03:00
return { localName, url };
2024-08-16 10:43:15 -03:00
}
}
public async getCaptionForImage({ pid, imageUrl }) {
const { min, user, params } = await DialogKeywords.getProcessInfo(pid);
const azureOpenAIKey = await min.core.getParam(min.instance, 'Azure Open AI Key', null);
const azureOpenAITextModel = 'gpt-4'; // Specify GPT-4 model here
const azureOpenAIEndpoint = await min.core.getParam(min.instance, 'Azure Open AI Endpoint', null);
if (azureOpenAIKey && azureOpenAITextModel && imageUrl) {
// Initialize the Azure OpenAI client
const client = new OpenAI({ apiKey: azureOpenAIKey, baseURL: azureOpenAIEndpoint });
// Construct a prompt to describe the image and generate a caption
const prompt = `Provide a descriptive caption for the image at the following URL: ${imageUrl}`;
// Generate a caption using GPT-4
const response = await client.completions.create({
model: azureOpenAITextModel,
prompt: prompt,
max_tokens: 50
});
const caption = response['data'].choices[0].text.trim();
GBLogEx.info(min, `Generated caption: ${caption}`);
return { caption };
}
}
2024-08-16 10:43:15 -03:00
}