Introduction#
Build a simple GenAI app using NextJS and Amazon Bedrock
- chat and simple prompt
- simple vector search
- simple rag
- image generation
- image analysis
Setup NextJS#
Let create a new NextJS project
npx create-next-app@latest
Then install dependencies, here is package.json
package.json
{"name": "next-prisma-hello","version": "0.1.0","private": true,"scripts": {"dev": "next dev","build": "next build","start": "next start","lint": "next lint"},"dependencies": {"@aws-sdk/client-bedrock-runtime": "^3.490.0","@aws-sdk/client-s3": "^3.525.0","@aws-sdk/credential-provider-node": "^3.525.0","@aws-sdk/credential-providers": "^3.525.0","@aws-sdk/s3-request-presigner": "^3.525.0","@opensearch-project/opensearch": "^2.5.0","@prisma/client": "^5.8.0","ai": "^2.2.33","aws-sdk": "^2.1569.0","i": "^0.3.7","langchain": "^0.1.25","next": "14.0.4","npm": "^10.5.0","package.json": "^2.0.1","react": "^18","react-dom": "^18"},"devDependencies": {"@types/node": "^20","@types/react": "^18","@types/react-dom": "^18","autoprefixer": "^10.0.1","eslint": "^8","eslint-config-next": "14.0.4","postcss": "^8","prisma": "^5.8.0","tailwindcss": "^3.3.0","typescript": "^5"}}
Project structure
|--app|--page.tsx|--api|--chat|--route.ts|--rag|--route.js|--image|--page.tsx|--rag|--page.tsx|--aoss|--page.tsx|--action.ts|--cdk|--bin|--cdk.ts|--lib|--cdk-stack.ts|--next.config.js|--package.json|--Dockerfile|--.dockerignore|--build.py|--.env
Chat Function#
- Simple conversation memory by using vercel SDK and bedrock
- Streaming chat response
- Frontend uses useChat react hook
Let implement the route.ts for chat
import {BedrockRuntime,InvokeModelWithResponseStreamCommand} from '@aws-sdk/client-bedrock-runtime'import { AWSBedrockAnthropicStream, StreamingTextResponse } from 'ai'import { experimental_buildAnthropicPrompt } from 'ai/prompts'// IMPORTANT! Set the runtime to edge// export const runtime = "edge";const bedrockClient = new BedrockRuntime({region: 'us-east-1'// region: process.env.AWS_REGION ?? "us-east-1",// credentials: {// accessKeyId: process.env.AWS_ACCESS_KEY_ID ?? "",// secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY ?? "",// sessionToken: process.env.AWS_SESSION_TOKEN ?? "",// },})export async function POST(req: Request) {// Extract the `prompt` from the body of the requestconst { messages } = await req.json()console.log(messages)console.log(experimental_buildAnthropicPrompt(messages))// Ask Claude for a streaming chat completion given the promptconst bedrockResponse = await bedrockClient.send(new InvokeModelWithResponseStreamCommand({modelId: 'anthropic.claude-v2',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({prompt: experimental_buildAnthropicPrompt(messages),max_tokens_to_sample: 2048})}))// Convert the response into a friendly text-streamconst stream = AWSBedrockAnthropicStream(bedrockResponse)// Respond with the streamreturn new StreamingTextResponse(stream)}
The useChat hook and submit chat on enter key
const { messages, input, handleInputChange, handleSubmit } = useChat({api: './api/chat'});<form onSubmit={handleSubmit}><inputclassName="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"value={input}placeholder="Say something..."onChange={handleInputChange}/></form>
Image Function#
Then let implement server function for generating image
- Generate image
- Upload to S3
- Generate signed url
'use server'import {GetObjectCommand,S3Client,PutObjectCommand} from '@aws-sdk/client-s3'import { getSignedUrl } from '@aws-sdk/s3-request-presigner'import {BedrockRuntime,InvokeModelCommand} from '@aws-sdk/client-bedrock-runtime'import * as fs from 'fs'const bedrock = new BedrockRuntime({ region: 'us-east-1' })const s3Client = new S3Client({ region: 'us-east-1' })const genImage = async ({ prompt }: { prompt: string }) => {let url = ''const body = {text_prompts: [{ text: prompt, weight: 1 }],seed: 3,cfg_scale: 10,samples: 1,steps: 50,style_preset: 'anime',height: 1024,width: 1024}const command = new InvokeModelCommand({body: JSON.stringify(body),modelId: 'stability.stable-diffusion-xl-v1',contentType: 'application/json',accept: 'image/png'})try {console.log(prompt)const imageName = 'sample' + Date.now().toString() + '.png'const key = `next-vercel-ai/${imageName}`const response = await bedrock.send(command)// fs.writeFile(`./public/${imageName}`, response["body"], () => {// console.log("OK");// });// upload to s3 input locationawait s3Client.send(new PutObjectCommand({Bucket: process.env.BUCKET,Key: key,Body: response['body']}))// generate signed urlconst commandGetUrl = new GetObjectCommand({Bucket: process.env.BUCKET,Key: key})url = await getSignedUrl(s3Client as any, commandGetUrl as any, {expiresIn: 3600})console.log(url)} catch (error) {console.log(error)}return url}export { genImage }
OpenSearch Permission#
[!IMPORTANT]
Please update data access policy in OpenSearch Serverless to grant principal such as Lambda ARN Role, CodeBuild Role, IAM to query and index collections.
- AppRunner instance role
- OpenSearch data access policy
Instance role to access AOSS opensearch
const instanceRole = new aws_iam.Role(this, 'InstanceRoleForApprunerBedrock', {assumedBy: new aws_iam.ServicePrincipal('tasks.apprunner.amazonaws.com'),roleName: 'InstanceRoleForApprunnerBedrock'})instanceRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: ['arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-v2','arn:aws:bedrock:us-east-1::foundation-model/stability.stable-diffusion-xl-v1','arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v1'],actions: ['bedrock:InvokeModel', 'bedrock:InvokeModelWithResponseStream']}))instanceRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [`arn:aws:s3:::${props.bucket}/*`],actions: ['s3:PutObject', 's3:GetObject']}))instanceRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [props.aossCollectionArn],actions: ['aoss:APIAccessAll']}))
We also need to configure the OpenSearch data access policy
[{"Rules": [{"Resource": ["collection/demo"],"Permission": ["aoss:CreateCollectionItems","aoss:DeleteCollectionItems","aoss:UpdateCollectionItems","aoss:DescribeCollectionItems"],"ResourceType": "collection"},{"Resource": ["index/demo/*"],"Permission": ["aoss:CreateIndex","aoss:DeleteIndex","aoss:UpdateIndex","aoss:DescribeIndex","aoss:ReadDocument","aoss:WriteDocument"],"ResourceType": "index"}],"Principal": ["arn:aws:iam::$ACCOUNT_ID:role/InstanceRoleForApprunnerBedrock"],"Description": "Rule 1"}]
OpenSearch Client#
- SignatureV4
- Index, Query
- Bulk data
It is possible to comment out the aws-sdk v2 in like in this script
AwsSigv4Signer.js
/** Copyright OpenSearch Contributors* SPDX-License-Identifier: Apache-2.0** The OpenSearch Contributors require contributions made to* this file be licensed under the Apache-2.0 license or a* compatible open source license.**/'use strict'const Connection = require('../Connection')const Transport = require('../Transport')const aws4 = require('aws4')const AwsSigv4SignerError = require('./errors')const crypto = require('crypto')const { toMs } = Transport.internalsconst getAwsSDKCredentialsProvider = async () => {// First try V3try {const awsV3 = await import('@aws-sdk/credential-provider-node')if (typeof awsV3.defaultProvider === 'function') {return awsV3.defaultProvider()}} catch (err) {// Ignore}// try {// const awsV2 = await import('aws-sdk');// if (awsV2.default && typeof awsV2.default.config.getCredentials === 'function') {// return () =>// new Promise((resolve, reject) => {// awsV2.default.config.getCredentials((err, credentials) => {// if (err) {// reject(err);// } else {// resolve(credentials);// }// });// });// }// } catch (err) {// // Ignore// }throw new AwsSigv4SignerError('Unable to find a valid AWS SDK, please provide a valid getCredentials function to AwsSigv4Signer options.')}const awsDefaultCredentialsProvider = () =>new Promise((resolve, reject) => {getAwsSDKCredentialsProvider().then(provider => {provider().then(resolve).catch(reject)}).catch(err => {reject(err)})})function AwsSigv4Signer(opts = {}) {const credentialsState = {credentials: null}if (!opts.region) {throw new AwsSigv4SignerError('Region cannot be empty')}if (!opts.service) {opts.service = 'es'}if (typeof opts.getCredentials !== 'function') {opts.getCredentials = awsDefaultCredentialsProvider}function buildSignedRequestObject(request = {}) {request.service = opts.servicerequest.region = opts.regionrequest.headers = request.headers || {}request.headers['host'] = request.hostnameconst signed = aws4.sign(request, credentialsState.credentials)signed.headers['x-amz-content-sha256'] = crypto.createHash('sha256').update(request.body || '', 'utf8').digest('hex')return signed}class AwsSigv4SignerConnection extends Connection {buildRequestObject(params) {const request = super.buildRequestObject(params)return buildSignedRequestObject(request)}}class AwsSigv4SignerTransport extends Transport {request(params, options = {}, callback = undefined) {// options is optional so if options is a function, it's the callback.if (typeof options === 'function') {callback = optionsoptions = {}}const currentCredentials = credentialsState.credentials/*** For AWS SDK V3* Make sure token will expire no earlier than `expiryBufferMs` milliseconds in the future.*/const expiryBufferMs = toMs(options.requestTimeout || this.requestTimeout)let expired = falseif (!currentCredentials) {// Credentials haven't been acquired yet.expired = true}// AWS SDK V2, needsRefresh should be available.else if (typeof currentCredentials.needsRefresh === 'function') {expired = currentCredentials.needsRefresh()}// AWS SDK V2, alternative to needsRefresh.else if (currentCredentials.expired === true) {expired = true}// AWS SDK V2, alternative to needsRefresh and expired.else if (currentCredentials.expireTime &¤tCredentials.expireTime < new Date()) {expired = true}// AWS SDK V3, Credentials.expiration is a Date objectelse if (currentCredentials.expiration &¤tCredentials.expiration.getTime() - Date.now() < expiryBufferMs) {expired = true}if (!expired) {if (typeof callback === 'undefined') {return super.request(params, options)}super.request(params, options, callback)return}// In AWS SDK V2 Credentials.refreshPromise should be available.if (currentCredentials &&typeof currentCredentials.refreshPromise === 'function') {if (typeof callback === 'undefined') {return currentCredentials.refreshPromise().then(() => {return super.request(params, options)})} else {currentCredentials.refreshPromise().then(() => {super.request(params, options, callback)}).catch(callback)return}}// For AWS SDK V3 or when the client has not acquired credentials yet.if (typeof callback === 'undefined') {return opts.getCredentials().then(credentials => {credentialsState.credentials = credentialsreturn super.request(params, options)})} else {opts.getCredentials().then(credentials => {credentialsState.credentials = credentialssuper.request(params, options, callback)}).catch(callback)}}}return {Transport: AwsSigv4SignerTransport,Connection: AwsSigv4SignerConnection,buildSignedRequestObject}}module.exports = AwsSigv4Signer
Index and Query#
Let create a page to index and query, and using server function
index-query.tsx
'use client'import { indexAoss, queryAoss } from './action'const IndexPage = () => {return (<div className="flex flex-col w-full max-w-md py-24 mx-auto stretch"><div><formonSubmit={event => {event.preventDefault()}}><textareaclassName="bg-gray-200 w-full p-5"rows={15}placeholder="document ..."></textarea><buttonclassName="px-10 py-3 rounded-sm bg-orange-400"onClick={async event => {event.preventDefault()const result = await indexAoss({ doc: '', title: '' })}}>Index</button></form><div className="mt-10"><formonSubmit={event => {event.preventDefault()}}><inputclassName="w-full px-5 py-3 bg-gray-200"placeholder="query ..."type="text"id="query"name="query"></input><buttonclassName="px-10 py-3 rounded-sm bg-orange-400 mt-2"onClick={async event => {event.preventDefault()const query = (document.getElementById('query') as HTMLInputElement).valueconsole.log(query)const result = await queryAoss({ query: query })document.getElementById('result')!.innerText = result}}>Query</button></form></div><div><p id="result"></p></div></div></div>)}export default IndexPage
Here is server function which index and query AOSS. First Bedrock client for creating embedding vectors and aoss client
'use server'import {BedrockRuntimeClient,InvokeModelCommand} from '@aws-sdk/client-bedrock-runtime'const { defaultProvider } = require('@aws-sdk/credential-provider-node')const { Client } = require('@opensearch-project/opensearch')const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws')const decoder = new TextDecoder()const bedrockClient = new BedrockRuntimeClient({region: process.env.REGION})const aossClient = new Client({...AwsSigv4Signer({region: process.env.REGION,service: 'aoss',getCredentials: () => {const credentialsProvider = defaultProvider({})return credentialsProvider()}}),node: process.env.OASS_URL})
Create a function for embedding vector
const createEmbedVector = async ({ doc }: { doc: string }) => {const response = await bedrockClient.send(new InvokeModelCommand({modelId: 'amazon.titan-embed-text-v1',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({inputText: doc})}))const vec = JSON.parse(decoder.decode(response.body))['embedding']return vec}
Create functions to query and index
export const queryAoss = async ({ query }: { query: string }) => {const vec = await createEmbedVector({ doc: query })const body = {size: 2,query: {knn: {vector_field: {vector: vec,k: 2}}}}var response = await aossClient.search({index: 'demo',body: body})let result = ''for (let hit of response.body.hits.hits) {result += hit._source.text}console.log(result)return result}export const indexAoss = async ({doc,title}: {doc: stringtitle: string}) => {// no chunk splitconst vec = await createEmbedVector({ doc: doc })// serverless does not support idconst body = {vector_field: vec,text: doc,title: title}var response = await aossClient.index({index: 'demo',body: body})console.log(response)}
Simple RAG#
- Use vercel SDK for handling streaming messages
- Query AOSS to get context
- Create prompt with context
Here is simple frontend page
rag.tsx
'use client'import { Message } from 'ai'import { useChat } from 'ai/react'export default function Chat() {const { messages, input, handleInputChange, handleSubmit } = useChat({api: './api/rag'})// Generate a map of message role to text colorconst roleToColorMap: Record<Message['role'], string> = {system: 'red',user: 'black',function: 'blue',tool: 'purple',assistant: 'green',data: 'orange'}return (<div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">{messages.length > 0? messages.map(m => (<divkey={m.id}className="whitespace-pre-wrap"style={{ color: roleToColorMap[m.role] }}><strong>{`${m.role}: `}</strong>{m.content || JSON.stringify(m.function_call)}<br /><br /></div>)): null}<form onSubmit={handleSubmit}><inputclassName="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"value={input}placeholder="Say something..."onChange={handleInputChange}/></form></div>)}
Here is /api/rag which handle post request from client
route.js
import {BedrockRuntime,InvokeModelCommand,InvokeModelWithResponseStreamCommand} from '@aws-sdk/client-bedrock-runtime'import { AWSBedrockAnthropicStream, StreamingTextResponse } from 'ai'const { defaultProvider } = require('@aws-sdk/credential-provider-node')const { Client } = require('@opensearch-project/opensearch')const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws')// IMPORTANT! Set the runtime to edge// export const runtime = "edge";const decoder = new TextDecoder()const bedrockClient = new BedrockRuntime({region: process.env.REGION// region: process.env.AWS_REGION ?? process.env.REGION,// credentials: {// accessKeyId: process.env.AWS_ACCESS_KEY_ID ?? "",// secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY ?? "",// sessionToken: process.env.AWS_SESSION_TOKEN ?? "",// },})const aossClient = new Client({...AwsSigv4Signer({region: process.env.REGION,service: 'aoss',getCredentials: () => {const credentialsProvider = defaultProvider()return credentialsProvider()}}),node: process.env.AOSS_URL})const createEmbedVector = async doc => {const response = await bedrockClient.send(new InvokeModelCommand({modelId: 'amazon.titan-embed-text-v1',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({inputText: doc})}))const vec = JSON.parse(decoder.decode(response.body))['embedding']console.log(vec.length)return vec}const queryAoss = async query => {const vec = await createEmbedVector(query)const body = {size: 2,query: {knn: {vector_field: {vector: vec,k: 2}}}}var response = await aossClient.search({index: 'demo',body: body})// for (let hit of response.body.hits.hits) {// console.log(hit);// }let result = ''for (let hit of response.body.hits.hits) {result += hit._source.text}// console.log(result);return result}export async function POST(req) {// Extract the `prompt` from the body of the requestconst { messages } = await req.json()console.log(messages)// question last messages// const question = "what is developing on aws course?";const question = messages.pop().contentconsole.log(question)// query opensearch get contextlet context = ''try {context = await queryAoss(question)console.log(context)} catch (error) {console.log(error)}const prompt = `\n\nHuman: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. don't include harmful content \n\n ${context} \n\nHuman: ${question} \n\nAssistant:`// Ask Claude for a streaming chat completion given the promptconst bedrockResponse = await bedrockClient.send(new InvokeModelWithResponseStreamCommand({modelId: 'anthropic.claude-v2',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({prompt: prompt,max_tokens_to_sample: 2048})}))// Convert the response into a friendly text-streamconst stream = AWSBedrockAnthropicStream(bedrockResponse)// Respond with the streamreturn new StreamingTextResponse(stream)}
Here is the most simple prompt with context
const prompt = `\n\nHuman: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. don't include harmful content \n\n ${context} \n\nHuman: ${question} \n\nAssistant:`
Docker#
To build and deploy with Apprunner please take note the next.config.js
/** @type {import('next').NextConfig} */const nextConfig = {output: 'standalone'}module.exports = nextConfig
Then prepare Dockerfile with multiple stage to optimize the image size
Dockerfile
FROM node:18-alpine AS base# Install dependencies only when neededFROM base AS deps# Check https://github.com/nodejs/docker-node/tree/b4117f9333da4138b03a546ec926ef50a31506c3#nodealpine to understand why libc6-compat might be needed.RUN apk add --no-cache libc6-compatWORKDIR /app# Install dependencies based on the preferred package managerCOPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./RUN \if [ -f yarn.lock ]; then yarn --frozen-lockfile; \elif [ -f package-lock.json ]; then npm ci; \elif [ -f pnpm-lock.yaml ]; then corepack enable pnpm && pnpm i --frozen-lockfile; \else echo "Lockfile not found." && exit 1; \fi# Rebuild the source code only when neededFROM base AS builderWORKDIR /appCOPY --from=deps /app/node_modules ./node_modulesCOPY . .# Next.js collects completely anonymous telemetry data about general usage.# Learn more here: https://nextjs.org/telemetry# Uncomment the following line in case you want to disable telemetry during the build.# ENV NEXT_TELEMETRY_DISABLED 1RUN \if [ -f yarn.lock ]; then yarn run build; \elif [ -f package-lock.json ]; then npm run build; \elif [ -f pnpm-lock.yaml ]; then corepack enable pnpm && pnpm run build; \else echo "Lockfile not found." && exit 1; \fi# Production image, copy all the files and run nextFROM base AS runnerWORKDIR /appENV NODE_ENV production# Uncomment the following line in case you want to disable telemetry during runtime.# ENV NEXT_TELEMETRY_DISABLED 1RUN addgroup --system --gid 1001 nodejsRUN adduser --system --uid 1001 nextjsCOPY --from=builder /app/public ./public# Set the correct permission for prerender cacheRUN mkdir .nextRUN chown nextjs:nodejs .next# Automatically leverage output traces to reduce image size# https://nextjs.org/docs/advanced-features/output-file-tracingCOPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/staticUSER nextjsEXPOSE 3000ENV PORT 3000# set hostname to localhostENV HOSTNAME "0.0.0.0"# server.js is created by next build from the standalone output# https://nextjs.org/docs/pages/api-reference/next-config-js/outputCMD ["node", "server.js"]
And .dockerignore file
Dockerfile.dockerignorenode_modulesnpm - debug.logREADME.md.next.git
Docker Command#
Stop all running containers and remove images
docker stop $(docker ps -a -q)docker rmi -f $(docker images -aq)
Delete existing images
docker rmi -f $(docker images -aq)
Build and tag image
docker build --tag entest .
Stop all processing running on port 3000
sudo kill -9 $(lsof -i:3000 -t)
Here is build script
build.py
import osimport boto3# parametersdeploy = 1# # parametersREGION = "us-east-1"ACCOUNT = os.environ["ACCOUNT_ID"]# delete all docker imagesos.system("sudo docker system prune -a")# build next-bedrock imageos.system("sudo docker build -t next-bedrock . ")# aws ecr loginos.system(f"aws ecr get-login-password --region {REGION} | sudo docker login --username AWS --password-stdin {ACCOUNT}.dkr.ecr.{REGION}.amazonaws.com")# get image idIMAGE_ID=os.popen("sudo docker images -q next-bedrock:latest").read()# tag next-bedrock imageos.system(f"sudo docker tag {IMAGE_ID.strip()} {ACCOUNT}.dkr.ecr.{REGION}.amazonaws.com/next-bedrock:latest")# create ecr repositoryos.system(f"aws ecr create-repository --registry-id {ACCOUNT} --repository-name next-bedrock")# push image to ecros.system(f"sudo docker push {ACCOUNT}.dkr.ecr.{REGION}.amazonaws.com/next-bedrock:latest")# run locally to test# os.system(f"sudo docker run -d -p 3000:3000 next-bedrock:latest")# apprunner deployif deploy == 1:apprunner = boto3.client('apprunner')apprunner.start_deployment(ServiceArn=f"arn:aws:apprunner:{REGION}:{ACCOUNT}:service/NextBedrockService/xxx")
AppRunner#
Let create a stack to deploy the app on apprunner
- Build role to pull ecr image
- Task role to invoke bedrock model
import { Stack, StackProps, aws_apprunner, aws_iam } from 'aws-cdk-lib'import { Effect } from 'aws-cdk-lib/aws-iam'import { Construct } from 'constructs'interface AppRunnerProps extends StackProps {ecr: stringbucket: string}export class AppRunnerStack extends Stack {constructor(scope: Construct, id: string, props: AppRunnerProps) {super(scope, id, props)const buildRole = new aws_iam.Role(this, 'RoleForAppRunnerPullEcrBedrock', {assumedBy: new aws_iam.ServicePrincipal('build.apprunner.amazonaws.com'),roleName: 'RoleForAppRunnerPullEcrBedrock'})buildRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: ['*'],actions: ['ecr:*']}))const instanceRole = new aws_iam.Role(this,'InstanceRoleForApprunerBedrock',{assumedBy: new aws_iam.ServicePrincipal('tasks.apprunner.amazonaws.com'),roleName: 'InstanceRoleForApprunnerBedrock'})instanceRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: ['arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-v2','arn:aws:bedrock:us-east-1::foundation-model/stability.stable-diffusion-xl-v1','arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v1'],actions: ['bedrock:InvokeModel','bedrock:InvokeModelWithResponseStream']}))instanceRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [`arn:aws:s3:::${props.bucket}/*`],actions: ['s3:PutObject', 's3:GetObject']}))instanceRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [props.aossCollectionArn],actions: ['aoss:APIAccessAll']}))const autoscaling = new aws_apprunner.CfnAutoScalingConfiguration(this,'AutoScalingForGoApp',{autoScalingConfigurationName: 'AutoScalingForGoApp',// min number instanceminSize: 1,// max number instancemaxSize: 10,// max concurrent request per instancemaxConcurrency: 100})const apprunner = new aws_apprunner.CfnService(this, 'NextBedrockService', {serviceName: 'NextBedrockService',sourceConfiguration: {authenticationConfiguration: {accessRoleArn: buildRole.roleArn},autoDeploymentsEnabled: false,imageRepository: {imageIdentifier: props.ecr,imageRepositoryType: 'ECR',imageConfiguration: {port: '3000',runtimeEnvironmentVariables: [{name: 'BUCKET',value: 'demo'},{name: 'HOSTNAME',value: '0.0.0.0'},{name: 'PORT',value: '3000'}]// startCommand: "",}}},instanceConfiguration: {cpu: '1 vCPU',memory: '2 GB',instanceRoleArn: instanceRole.roleArn},observabilityConfiguration: {observabilityEnabled: false},autoScalingConfigurationArn: autoscaling.ref})apprunner.addDependency(autoscaling)}}
FrontEnd Chat#
First let create a chat page which submit on enter key
'use client'import { Message } from 'ai'import { useChat } from 'ai/react'export default function Chat() {const { messages, input, handleInputChange, handleSubmit } = useChat({api: './api/chat'})// Generate a map of message role to text colorconst roleToColorMap: Record<Message['role'], string> = {system: 'red',user: 'black',function: 'blue',tool: 'purple',assistant: 'green',data: 'orange'}return (<div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">{messages.length > 0? messages.map(m => (<divkey={m.id}className="whitespace-pre-wrap"style={{ color: roleToColorMap[m.role] }}><strong>{`${m.role}: `}</strong>{m.content || JSON.stringify(m.function_call)}<br /><br /></div>)): null}<form onSubmit={handleSubmit}><inputclassName="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"value={input}placeholder="Say something..."onChange={handleInputChange}/></form></div>)}
FrontEnd Image#
Second let create an image generation page
'use client'import { useEffect, useState } from 'react'import { genImage } from './actions'const SendIcon = ({ className }: { className?: string }) => {return (<svgxmlns="http://www.w3.org/2000/svg"viewBox="0 0 16 16"fill="none"className={className}strokeWidth="2"><pathd="M.5 1.163A1 1 0 0 1 1.97.28l12.868 6.837a1 1 0 0 1 0 1.766L1.969 15.72A1 1 0 0 1 .5 14.836V10.33a1 1 0 0 1 .816-.983L8.5 8 1.316 6.653A1 1 0 0 1 .5 5.67V1.163Z"fill="currentColor"></path></svg>)}const HomePage = () => {const [url, setUrl] = useState<string>('')useEffect(() => {}, [url])return (<main><div className="max-w-3xl mx-auto"><form className="mt-10 px-10"><div className="relative "><textarearows={2}id="prompt"name="prompt"className="w-[100%] bg-gray-300 px-5 py-5 rounded-sm"onKeyDown={async event => {if (event.key === 'Enter' && event.shiftKey === false) {event.preventDefault()setUrl('')document.getElementById('modal')!.style.display = 'block'const url = await genImage({prompt: (document.getElementById('prompt') as HTMLInputElement).value})setUrl(url)document.getElementById('modal')!.style.display = 'none'}}}></textarea><buttonclassName="absolute top-[50%] translate-y-[-50%] right-1 flex items-center justify-center rounded-md w-10 h-16 bg-green-500 hover:bg-green-600"onClick={async event => {event.preventDefault()setUrl('')document.getElementById('modal')!.style.display = 'block'const url = await genImage({prompt: (document.getElementById('prompt') as HTMLInputElement).value})setUrl(url)document.getElementById('modal')!.style.display = 'none'}}><SendIcon className="h-4 w-4 text-white"></SendIcon></button></div></form><div className="mt-10 px-10"><img src={url}></img></div><divid="modal"className="fixed top-0 left-0 bg-slate-400 min-h-screen w-full opacity-60"hidden><div className="min-h-screen flex justify-center items-center"><h1>Wait a few second!</h1></div></div></div></main>)}export default HomePage
And here is the image generation server function
- Call bedrock stability model
- Save image to S3
- Return signed URL and display image
'use server'import {GetObjectCommand,S3Client,PutObjectCommand} from '@aws-sdk/client-s3'import { getSignedUrl } from '@aws-sdk/s3-request-presigner'import {BedrockRuntime,InvokeModelCommand} from '@aws-sdk/client-bedrock-runtime'import * as fs from 'fs'const bedrock = new BedrockRuntime({ region: 'us-east-1' })const s3Client = new S3Client({ region: 'us-east-1' })const genImage = async ({ prompt }: { prompt: string }) => {let url = ''const body = {text_prompts: [{ text: prompt, weight: 1 }],seed: 3,cfg_scale: 10,samples: 1,steps: 50,style_preset: 'anime',height: 1024,width: 1024}const command = new InvokeModelCommand({body: JSON.stringify(body),modelId: 'stability.stable-diffusion-xl-v1',contentType: 'application/json',accept: 'image/png'})try {console.log(prompt)const imageName = 'sample' + Date.now().toString() + '.png'const key = `next-vercel-ai/${imageName}`const response = await bedrock.send(command)// fs.writeFile(`./public/${imageName}`, response["body"], () => {// console.log("OK");// });// upload to s3 input locationawait s3Client.send(new PutObjectCommand({Bucket: process.env.BUCKET,Key: key,Body: response['body']}))// generate signed urlconst commandGetUrl = new GetObjectCommand({Bucket: process.env.BUCKET,Key: key})url = await getSignedUrl(s3Client as any, commandGetUrl as any, {expiresIn: 3600})console.log(url)} catch (error) {console.log(error)}return url}export { genImage }
Upload Document#
Let create a page for uploading documents to S3 which then trigger a Lambda function to index documents to OpenSearch
- create cognito identity pool for guest users for uploading to s3
- create s3 client
- frontend
First let create an cognito identity pool for guest user with policy as the following
{"Version": "2012-10-17","Statement": [{"Effect": "Allow","Action": ["cognito-identity:GetCredentialsForIdentity"],"Resource": ["*"]},{"Effect": "Allow","Action": ["s3:PutObject", "s3:GetObject"],"Resource": ["arn:aws:s3:::$BUCKET/documents/*"]}]}
Second let create s3 client for uploading
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3'import { fromCognitoIdentityPool } from '@aws-sdk/credential-providers'const uploadToS3 = async (file: File) => {console.log('upload to s3 ')// s3 clientconst s3Client = new S3Client({region: 'us-east-1',credentials: fromCognitoIdentityPool({clientConfig: { region: 'us-east-1' },identityPoolId: 'us-east-1:887c4756-e061-4fb0-a44a-cc9a6a59d96d',logins: {// [process.env.COGNITO_POOL_ID ?? ""]: idToken,}})})// command to upload to s3const command = new PutObjectCommand({Bucket: 'cdk-entest-videos',Key: `documents/${file.name}`,Body: file})// upload file to s3try {document.getElementById('modal')!.style.display = 'block'const res = await s3Client.send(command)console.log(res)} catch (error) {console.log('erorr upload to s3 ', error)}document.getElementById('modal')!.style.display = 'none'}
Finally create a simple frontend
upload-s3.tsx
'use client'import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3'import { fromCognitoIdentityPool } from '@aws-sdk/credential-providers'const uploadToS3 = async (file: File) => {console.log('upload to s3 ')// s3 clientconst s3Client = new S3Client({region: 'us-east-1',credentials: fromCognitoIdentityPool({clientConfig: { region: 'us-east-1' },identityPoolId: 'us-east-1:111222333444',logins: {// [process.env.COGNITO_POOL_ID ?? ""]: idToken,}})})// command to upload to s3const command = new PutObjectCommand({Bucket: 'bucket-name',Key: `documents/${file.name}`,Body: file})// upload file to s3try {document.getElementById('modal')!.style.display = 'block'const res = await s3Client.send(command)console.log(res)} catch (error) {console.log('erorr upload to s3 ', error)}document.getElementById('modal')!.style.display = 'none'}const IndexPage = () => {const submit = async (data: FormData) => {const file = data.get('upload') as Fileawait uploadToS3(file)}return (<div className="flex flex-col w-full max-w-md py-24 mx-auto stretch"><div><form className="mb-5" action={submit}><div className="flex flex-row w-full bg-gray-200 justify-center items-center py-3 px-3"><inputtype="file"id="upload"name="upload"className="w-full cursor-pointer"></input><buttonid="upload-button"className="bg-orange-400 px-10 py-3 rounded-sm"onClick={event => {console.log('upload file ...')}}>Upload</button></div></form><div><p id="result"></p></div></div><divid="modal"className="fixed top-0 left-0 bg-slate-400 min-h-screen w-full opacity-60"hidden><div className="min-h-screen flex justify-center items-center"><h1>Wait a few second!</h1></div></div></div>)}export default IndexPage
OpenSearch#
Let create a page for query and inexing opensearch serverless (aoss). We need aoss client, bedrock client for creating embedding vectors.
'use server'import {BedrockRuntimeClient,InvokeModelCommand} from '@aws-sdk/client-bedrock-runtime'import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3'const { defaultProvider } = require('@aws-sdk/credential-provider-node')const { Client } = require('@opensearch-project/opensearch')const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws')import { writeFile } from 'fs/promises'import { join } from 'path'const decoder = new TextDecoder()const bedrockClient = new BedrockRuntimeClient({region: process.env.REGION})const s3Client = new S3Client({region: process.env.REGION// credentials: fromCognitoIdentityPool({// clientConfig: { region: config.REGION },// identityPoolId: config.IDENTITY_POOL_ID,// logins: {// [config.COGNITO_POOL_ID]: idToken,// },// }),})const aossClient = new Client({...AwsSigv4Signer({region: process.env.REGION,service: 'aoss',getCredentials: () => {const credentialsProvider = defaultProvider({})return credentialsProvider()}}),node: process.env.AOSS_URL})
Create embedding vector using Bedrock titan model
const createEmbedVector = async ({ doc }: { doc: string }) => {const response = await bedrockClient.send(new InvokeModelCommand({modelId: 'amazon.titan-embed-text-v1',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({inputText: doc})}))const vec = JSON.parse(decoder.decode(response.body))['embedding']return vec}
Query AOSS
export const queryAoss = async ({ query }: { query: string }) => {const vec = await createEmbedVector({ doc: query })const body = {size: 2,query: {knn: {vector_field: {vector: vec,k: 2}}}}var response = await aossClient.search({index: 'demo',body: body})const docs = response.body.hits.hitsconsole.log(docs)return docs}
Index AOSS
export const indexAoss = async ({doc,title}: {doc: stringtitle: string}) => {// no chunk splitconst vec = await createEmbedVector({ doc: doc })// serverless does not support idconst body = {vector_field: vec,text: doc,title: title}var response = await aossClient.index({index: 'demo',body: body})console.log(response)}
Finally RAG
const qaRag = async ({ question }: { question: string }) => {// query opensearch get contextconst context = await queryAoss({ query: question })// build promptconst prompt = `\n\nHuman: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. don't include harmful content \n\n ${context} \n\nHuman: ${question} \n\nAssistant:`console.log(prompt)// prompt bedrock anthropicconst response = await bedrockClient.send(new InvokeModelCommand({modelId: 'anthropic.claude-v2',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({prompt: prompt,max_tokens_to_sample: 2048})}))console.log(response)console.log(decoder.decode(response.body))}
Here is detail code
aoss.ts
'use server'import {BedrockRuntimeClient,InvokeModelCommand} from '@aws-sdk/client-bedrock-runtime'import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3'const { defaultProvider } = require('@aws-sdk/credential-provider-node')const { Client } = require('@opensearch-project/opensearch')const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws')import { writeFile } from 'fs/promises'import { join } from 'path'const decoder = new TextDecoder()const bedrockClient = new BedrockRuntimeClient({region: process.env.REGION})const s3Client = new S3Client({region: process.env.REGION// credentials: fromCognitoIdentityPool({// clientConfig: { region: config.REGION },// identityPoolId: config.IDENTITY_POOL_ID,// logins: {// [config.COGNITO_POOL_ID]: idToken,// },// }),})const aossClient = new Client({...AwsSigv4Signer({region: process.env.REGION,service: 'aoss',getCredentials: () => {const credentialsProvider = defaultProvider({})return credentialsProvider()}}),node: process.env.AOSS_URL})const createEmbedVector = async ({ doc }: { doc: string }) => {const response = await bedrockClient.send(new InvokeModelCommand({modelId: 'amazon.titan-embed-text-v1',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({inputText: doc})}))const vec = JSON.parse(decoder.decode(response.body))['embedding']return vec}export const queryAoss = async ({ query }: { query: string }) => {const vec = await createEmbedVector({ doc: query })const body = {size: 2,query: {knn: {vector_field: {vector: vec,k: 2}}}}var response = await aossClient.search({index: 'demo',body: body})// let result = "";// for (let hit of response.body.hits.hits) {// result += hit._source.text;// }// console.log(result);// return result;const docs = response.body.hits.hitsconsole.log(docs)return docs}export const indexAoss = async ({doc,title}: {doc: stringtitle: string}) => {// no chunk splitconst vec = await createEmbedVector({ doc: doc })// serverless does not support idconst body = {vector_field: vec,text: doc,title: title}var response = await aossClient.index({index: 'demo',body: body})console.log(response)}const qaRag = async ({ question }: { question: string }) => {// query opensearch get contextconst context = await queryAoss({ query: question })// build promptconst prompt = `\n\nHuman: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. don't include harmful content \n\n ${context} \n\nHuman: ${question} \n\nAssistant:`console.log(prompt)// prompt bedrock anthropicconst response = await bedrockClient.send(new InvokeModelCommand({modelId: 'anthropic.claude-v2',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({prompt: prompt,max_tokens_to_sample: 2048})}))console.log(response)console.log(decoder.decode(response.body))}export const uploadFile = async (data: FormData) => {console.log('file: ', data.get('upload'))const file: File | null = data.get('upload') as unknown as Fileif (!file) {throw new Error('No file uploaded')}// file bufferconst bytes = await file.arrayBuffer()const buffer = Buffer.from(bytes)// write to localconst path = join('/tmp', file.name)await writeFile(path, buffer)console.log(`open ${path} to see the upload file`)// write to s3const command = new PutObjectCommand({Bucket: process.env.BUCKET,Key: `documents/${file.name}`,Body: Buffer.from(bytes)})// upload file to s3try {const res = await s3Client.send(command)console.log(res)} catch (error) {console.log('erorr upload to s3 ', error)}return {status: 'OK',message: 'message sent OK'}}// createEmbedVector({ doc: "hello" });// queryAoss({ query: "What is Amazon Bedrock" });// indexAoss({// doc: "Amazon Bedrock is a fully managed service that makes high-performing foundation models (FMs) from leading AI startups and Amazon available for your use through a unified API. You can choose from a wide range of foundation models to find the model that is best suited for your use case. Amazon Bedrock also offers a broad set of capabilities to build generative AI applications with security, privacy, and responsible AI. Using Amazon Bedrock, you can easily experiment with and evaluate top foundation models for your use cases, privately customize them with your data using techniques such as fine-tuning and Retrieval Augmented Generation (RAG), and build agents that execute tasks using your enterprise systems and data sources. With Amazon Bedrock's serverless experience, you can get started quickly, privately customize foundation models with your own data, and easily and securely integrate and deploy them into your applications using AWS tools without having to manage any infrastructure.",// title: "What is Amazon Bedrock",// });// qaRag({ question: "what is developing on aws course?" });
And here is frontend
aoss.tsx
'use client'import { useState } from 'react'import { indexAoss, queryAoss } from './action'const IndexPage = () => {const [docs, setDocs] = useState<any[]>([])return (<div className="flex flex-col w-full max-w-md py-24 mx-auto stretch"><div><formclassName="mb-3"onSubmit={event => {event.preventDefault()}}><inputtype="text"id="title"name="title"placeholder="title"className="w-full bg-gray-200 p-5 mb-3"></input><textareaclassName="bg-gray-200 w-full p-5"rows={15}placeholder="document ..."id="document"name="document"></textarea><buttonclassName="px-10 py-3 rounded-sm bg-orange-400"onClick={async event => {event.preventDefault()const title = (document.getElementById('title') as HTMLInputElement).valueconst doc = (document.getElementById('document') as HTMLInputElement).valueconsole.log(title, doc)document.getElementById('modal')!.style.display = 'block'await indexAoss({ doc: doc, title: title })document.getElementById('modal')!.style.display = 'none'}}>Index</button></form><div><formonSubmit={event => {event.preventDefault()}}><inputclassName="w-full px-5 py-3 bg-gray-200"placeholder="query ..."type="text"id="query"name="query"></input><buttonclassName="px-10 py-3 rounded-sm bg-orange-400 mt-2"onClick={async event => {event.preventDefault()const query = (document.getElementById('query') as HTMLInputElement).valueconsole.log(query)const docs = await queryAoss({ query: query })setDocs(docs)console.log(docs)// document.getElementById("result")!.innerText = docs;}}>Query</button></form></div><div>{docs.map(doc => (<div key={doc._id} className="mb-3"><h3 className="font-bold">{doc._source.title}</h3><p>{doc._source.text}</p><hr className="bg-blue-500 border-1 border-blue-500"></hr></div>))}</div></div><divid="modal"className="fixed top-0 left-0 bg-slate-400 min-h-screen w-full opacity-90"hidden><div className="min-h-screen flex justify-center items-center"><h1>Wait a few second!</h1></div></div></div>)}export default IndexPage
And RAG frontend page
rag.tsx
'use client'import { Message } from 'ai'import { useChat } from 'ai/react'export default function Chat() {const { messages, input, handleInputChange, handleSubmit } = useChat({api: './api/rag'})// Generate a map of message role to text colorconst roleToColorMap: Record<Message['role'], string> = {system: 'red',user: 'black',function: 'blue',tool: 'purple',assistant: 'green',data: 'orange'}return (<div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">{messages.length > 0? messages.map(m => (<divkey={m.id}className="whitespace-pre-wrap"style={{ color: roleToColorMap[m.role] }}><strong>{`${m.role}: `}</strong>{m.content || JSON.stringify(m.function_call)}<br /><br /></div>)): null}<form onSubmit={handleSubmit}><inputclassName="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"value={input}placeholder="Say something..."onChange={handleInputChange}/></form></div>)}
Lambda OASS#
Let create a lambda function which triggered when uploading a PDF file. This function will index oass. Here is the logic
# haimtran 07 DEC 2022# opensearch serverlessfrom opensearchpy import OpenSearch, RequestsHttpConnectionfrom requests_aws4auth import AWS4Authfrom langchain_community.document_loaders import PyPDFLoaderfrom langchain.text_splitter import CharacterTextSplitterimport boto3import jsonimport osfrom datetime import datetime#INDEX = "demo"# chunk sizeCHUNK_SIZE = 1000#if "BUCKET" in os.environ:passelse:os.environ["BUCKET"] = "BUCKET-NAME"# opensearch domainif "OPENSEARCH_DOMAIN" in os.environ:passelse:os.environ["OPENSEARCH_DOMAIN"] = ("domain.us-east-1.aoss.amazonaws.com")os.environ["REGION"] = "us-east-1"# bedrock clientbedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1")# s3 clients3_client = boto3.client("s3")# host and opensearch clienthost = os.environ["OPENSEARCH_DOMAIN"]client = boto3.client("opensearchserverless")region = os.environ["REGION"]credentials = boto3.Session().get_credentials()# authawsauth = AWS4Auth(credentials.access_key,credentials.secret_key,region,"aoss",session_token=credentials.token,)# opensearch clientaoss_client = OpenSearch(hosts=[{"host": host, "port": 443}],http_auth=awsauth,use_ssl=True,verify_certs=True,connection_class=RequestsHttpConnection,timeout=300,)def get_embedded_vector(query: str):"""convert text to embedding vector using bedrock"""# request bodybody = json.dumps({"inputText": query})# call bedrock titanresponse = bedrock_client.invoke_model(body=body,modelId="amazon.titan-embed-text-v1",accept="application/json",contentType="application/json",)# get embed vectorvector = json.loads(response["body"].read())["embedding"]# returnreturn vectordef load_pdf_to_vectors(key: str):"""load pdf to opensearch"""# filenamefilename = (key.split("/").pop().split(".")[0] + "-" + datetime.now().isoformat() + ".pdf")# download s3 file YOUR-DEFAULT-AWS-REGIONs3_client.download_file(os.environ["BUCKET"], key, f"/tmp/{filename}")# read pdf fileloader = PyPDFLoader(f"/tmp/{filename}")pages = loader.load_and_split()# chunk with fixed sizetext_splitter = CharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=0)docs = text_splitter.split_documents(documents=pages)# embeddingvectors = [get_embedded_vector(doc.page_content) for doc in docs]# returnreturn vectors, docsdef index_vectors_to_aoss(vectors, docs, filename):"""indexing data to opensearch"""# bulk indexingdata = ""for index, doc in enumerate(docs):data += json.dumps({"index": {"_index": INDEX}}) + "\n"data += (json.dumps({"vector_field": vectors[index],"text": doc.page_content,"title": f"{filename} chunk {index}",})+ "\n")# bulk indexaoss_client.bulk(data)# returnreturn datadef handler(event, context):"""seach"""# get filename from eventfor record in event["Records"]:# get key objectkey = record["s3"]["object"]["key"]print(key)# filenamefilename = key.split("/").pop()# load pdf to vectorsvectors, docs = load_pdf_to_vectors(key=key)print(vectors)# index vectors to aossdata = index_vectors_to_aoss(vectors=vectors, docs=docs, filename=filename)print(data)# returnreturn {"statusCode": 200,"headers": {"Access-Control-Allow-Headers": "*","Access-Control-Allow-Origin": "*","Access-Control-Allow-Methods": "OPTIONS,POST,GET",},"body": json.dumps({}),}if __name__ == "__main__":# vecs, docs = load_pdf_to_vectors("book/datasheet.pdf")# data = index_vectors_to_aoss(vecs, docs)# print(data)handler(event={"Records": [{"s3": {"object": {"key": "book/datasheet.pdf"}}}]},context=None,)
Let deploy it using CDK and container. Here is requirements.txt and Dockerfile
boto3==1.34.55opensearch-py==2.4.2langchain==0.1.10pypdf==4.1.0requests-aws4auth==1.2.3
Dockerfile
FROM public.ecr.aws/lambda/python:3.9# create code dir inside containerRUN mkdir ${LAMBDA_TASK_ROOT}/source# copy code to containerCOPY "requirements.txt" ${LAMBDA_TASK_ROOT}/source# copy handler function to containerCOPY ./index.py ${LAMBDA_TASK_ROOT}# install dependencies for running time environmentRUN pip3 install -r ./source/requirements.txt --target "${LAMBDA_TASK_ROOT}"RUN python3 -m pip install boto3 --upgrade# set the CMD to your handlerCMD [ "index.handler" ]
Finally here is CDK stack. Please take note to grant the Lambda function index and query oass we need to
- Setup IAM role for Lambda with permission to do aoss:ApiCall
- From OASS access role, please add the IAM role
Here is the cdk stack.
lambda-stack.ts
interface LambdaAossProps extends StackProps {opensearchDomain: stringaossCollectionArn: stringbucketName: string}export class LambdaAossStack extends Stack {constructor(scope: Construct, id: string, props: LambdaAossProps) {super(scope, id, props)// role for lambda to read opensearchconst role = new aws_iam.Role(this, 'RoleForLambdaIndexAossBedrock', {roleName: 'RoleForLambdaIndexAossBedrock',assumedBy: new aws_iam.ServicePrincipal('lambda.amazonaws.com')})role.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [`arn:aws:s3:::${props.bucketName}/*`],actions: ['s3:GetObject']}))role.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [`arn:aws:bedrock:${this.region}::foundation-model/anthropic.claude-v2`,`arn:aws:bedrock:${this.region}::foundation-model/stability.stable-diffusion-xl-v1`,`arn:aws:bedrock:${this.region}::foundation-model/amazon.titan-embed-text-v1`],actions: ['bedrock:InvokeModel','bedrock:InvokeModelWithResponseStream']}))role.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [props.aossCollectionArn],actions: ['aoss:APIAccessAll']}))// lambda function to query opensearchnew aws_lambda.Function(this, 'LamdaQueryOpenSearch', {functionName: 'LambdaIndexAossBedrock',memorySize: 2048,timeout: Duration.seconds(300),code: aws_lambda.EcrImageCode.fromAssetImage(path.join(__dirname, './../lambda/lambda-index-aoss/')),handler: aws_lambda.Handler.FROM_IMAGE,runtime: aws_lambda.Runtime.FROM_IMAGE,environment: {OPENSEARCH_DOMAIN: props.opensearchDomain,PYTHONPATH: '/var/task/package',REGION: this.region,BUCKET: props.bucketName},role: role})}}
Here is both apprunner and Lambda
apprunner-lambda.ts
import {Duration,Stack,StackProps,aws_apprunner,aws_iam,aws_lambda} from 'aws-cdk-lib'import { Effect } from 'aws-cdk-lib/aws-iam'import { Construct } from 'constructs'import * as path from 'path'interface AppRunnerProps extends StackProps {ecr: stringbucket: stringaossCollectionArn: string}export class AppRunnerStack extends Stack {constructor(scope: Construct, id: string, props: AppRunnerProps) {super(scope, id, props)const buildRole = new aws_iam.Role(this, 'RoleForAppRunnerPullEcrBedrock', {assumedBy: new aws_iam.ServicePrincipal('build.apprunner.amazonaws.com'),roleName: 'RoleForAppRunnerPullEcrBedrock'})buildRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: ['*'],actions: ['ecr:*']}))const instanceRole = new aws_iam.Role(this,'InstanceRoleForApprunerBedrock',{assumedBy: new aws_iam.ServicePrincipal('tasks.apprunner.amazonaws.com'),roleName: 'InstanceRoleForApprunnerBedrock'})instanceRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: ['arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-v2','arn:aws:bedrock:us-east-1::foundation-model/stability.stable-diffusion-xl-v1','arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v1'],actions: ['bedrock:InvokeModel','bedrock:InvokeModelWithResponseStream']}))instanceRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [`arn:aws:s3:::${props.bucket}/*`],actions: ['s3:PutObject', 's3:GetObject']}))instanceRole.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [props.aossCollectionArn],actions: ['aoss:APIAccessAll']}))const autoscaling = new aws_apprunner.CfnAutoScalingConfiguration(this,'AutoScalingForGoApp',{autoScalingConfigurationName: 'AutoScalingForGoApp',// min number instanceminSize: 1,// max number instancemaxSize: 10,// max concurrent request per instancemaxConcurrency: 100})const apprunner = new aws_apprunner.CfnService(this, 'NextBedrockService', {serviceName: 'NextBedrockService',sourceConfiguration: {authenticationConfiguration: {accessRoleArn: buildRole.roleArn},autoDeploymentsEnabled: false,imageRepository: {imageIdentifier: props.ecr,imageRepositoryType: 'ECR',imageConfiguration: {port: '3000',runtimeEnvironmentVariables: [{name: 'BUCKET',value: props.bucket},{name: 'HOSTNAME',value: '0.0.0.0'},{name: 'PORT',value: '3000'}]// startCommand: "",}}},instanceConfiguration: {cpu: '1 vCPU',memory: '2 GB',instanceRoleArn: instanceRole.roleArn},observabilityConfiguration: {observabilityEnabled: false},autoScalingConfigurationArn: autoscaling.ref})apprunner.addDependency(autoscaling)}}interface LambdaAossProps extends StackProps {opensearchDomain: stringaossCollectionArn: stringbucketName: string}export class LambdaAossStack extends Stack {constructor(scope: Construct, id: string, props: LambdaAossProps) {super(scope, id, props)// role for lambda to read opensearchconst role = new aws_iam.Role(this, 'RoleForLambdaIndexAossBedrock', {roleName: 'RoleForLambdaIndexAossBedrock',assumedBy: new aws_iam.ServicePrincipal('lambda.amazonaws.com')})role.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [`arn:aws:s3:::${props.bucketName}/*`],actions: ['s3:GetObject']}))role.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [`arn:aws:bedrock:${this.region}::foundation-model/anthropic.claude-v2`,`arn:aws:bedrock:${this.region}::foundation-model/stability.stable-diffusion-xl-v1`,`arn:aws:bedrock:${this.region}::foundation-model/amazon.titan-embed-text-v1`],actions: ['bedrock:InvokeModel','bedrock:InvokeModelWithResponseStream']}))role.addToPolicy(new aws_iam.PolicyStatement({effect: Effect.ALLOW,resources: [props.aossCollectionArn],actions: ['aoss:APIAccessAll']}))// lambda function to query opensearchnew aws_lambda.Function(this, 'LamdaQueryOpenSearch', {functionName: 'LambdaIndexAossBedrock',memorySize: 2048,timeout: Duration.seconds(300),code: aws_lambda.EcrImageCode.fromAssetImage(path.join(__dirname, './../lambda/lambda-index-aoss/')),handler: aws_lambda.Handler.FROM_IMAGE,runtime: aws_lambda.Runtime.FROM_IMAGE,environment: {OPENSEARCH_DOMAIN: props.opensearchDomain,PYTHONPATH: '/var/task/package',REGION: this.region,BUCKET: props.bucketName},role: role})}}
Claude3 Sonnet#
Since vercel ai does not support this model id yet, let stream it to client. First let create an /app/api/claude/route.ts
route.ts
import {BedrockRuntime,InvokeModelWithResponseStreamCommand} from '@aws-sdk/client-bedrock-runtime'import { NextRequest } from 'next/server'const decoder = new TextDecoder()const bedrock = new BedrockRuntime({ region: 'us-east-1' })async function* makeIterator(prompt: String) {const claudePrompt = `\n\nHuman: ${prompt} \n\nAssistant:`const config = {prompt: claudePrompt,max_tokens_to_sample: 2048,temperature: 0.5,top_k: 250,top_p: 1,stop_sequences: ['\n\nHuman:']}// const command = new InvokeModelWithResponseStreamCommand({// // body: JSON.stringify(config),// body: body,// modelId: "anthropic.claude-3-sonnet-20240229-v1:0",// accept: "application/json",// contentType: "application/json",// });const command = new InvokeModelWithResponseStreamCommand({modelId: 'anthropic.claude-3-sonnet-20240229-v1:0',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({anthropic_version: 'bedrock-2023-05-31',max_tokens: 1000,messages: [{role: 'user',content: [{type: 'text',text: prompt}]}]})})try {console.log('call bedrock ...')const response = await bedrock.send(command)if (response.body) {console.log(response.body)for await (const chunk of response.body) {if (chunk.chunk) {try {const json = JSON.parse(decoder.decode(chunk.chunk.bytes))yield json.delta.text} catch (error) {console.log(error)yield ' '}}}}} catch (error) {console.log(error)}}function iteratorToStream(iterator: any) {return new ReadableStream({async pull(controller) {const { value, done } = await iterator.next()if (done) {controller.close()} else {controller.enqueue(value)}}})}function sleep(time: number) {return new Promise(resolve => {setTimeout(resolve, time)})}const encoder = new TextEncoder()export async function GET() {const iterator = makeIterator('how to cook chicken soup?')const stream = iteratorToStream(iterator)return new Response(stream)}export async function POST(request: NextRequest) {const res = await request.json()console.log(res)const iterator = makeIterator(res.prompt)const stream = iteratorToStream(iterator)return new Response(stream)// return Response.json({ name: "hai" });}
Please note to parse the stream response at node service side as it is less prone to error than parsing in frontend client
async function* makeIterator(prompt: String) {const claudePrompt = `\n\nHuman: ${prompt} \n\nAssistant:`const config = {prompt: claudePrompt,max_tokens_to_sample: 2048,temperature: 0.5,top_k: 250,top_p: 1,stop_sequences: ['\n\nHuman:']}// const command = new InvokeModelWithResponseStreamCommand({// // body: JSON.stringify(config),// body: body,// modelId: "anthropic.claude-3-sonnet-20240229-v1:0",// accept: "application/json",// contentType: "application/json",// });const command = new InvokeModelWithResponseStreamCommand({modelId: 'anthropic.claude-3-sonnet-20240229-v1:0',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({anthropic_version: 'bedrock-2023-05-31',max_tokens: 1000,messages: [{role: 'user',content: [{type: 'text',text: prompt}]}]})})try {console.log('call bedrock ...')const response = await bedrock.send(command)if (response.body) {console.log(response.body)for await (const chunk of response.body) {if (chunk.chunk) {try {const json = JSON.parse(decoder.decode(chunk.chunk.bytes))yield json.delta.text} catch (error) {console.log(error)yield ' '}}}}} catch (error) {console.log(error)}}
Next create a simple front end /app/claude/page.tsx
page.tsx
'use client'const ChatPage = () => {const callBedrock = async () => {const prompt = (document.getElementById('prompt') as HTMLInputElement).valueconst story = document.getElementById('story-output')story!.innerText = ''// console.log("call bedrock ", prompt);try {const response = await fetch('/api/claudesonet', {method: 'POST',headers: {'Content-Type': 'application/json'},body: JSON.stringify({ prompt: prompt })})// console.log(response);const reader = response.body!.getReader()const decoder = new TextDecoder()while (true) {const { done, value } = await reader.read()if (done) {break}try {const json = decoder.decode(value)story!.innerText += json} catch (error) {console.log(error)// story!.innerText += "ERROR";}}} catch (error) {console.log(error)}}return (<div className="min-h-screen"><div className="max-w-3xl mx-auto px-10 py-10"><div><form className="relative mb-3"><inputtype="text"className="border-2 border-blue-500 border- w-full p-3"id="prompt"></input><buttontype="submit"className="bg-orange-500 px-10 py-2.5 rounded-md cursor-pointer absolute top-0 right-0 translate-y-1 mr-2"onClick={event => {event.preventDefault()callBedrock()}}>Submit</button></form></div><p id="story-output"></p></div></div>)}export default ChatPage
Claude3 Haiku#
Similar to Sonet, let write a simple nextjs server api or server function to invoke Bedrock, and stream to client chunk by chunk
import {BedrockRuntime,InvokeModelWithResponseStreamCommand} from '@aws-sdk/client-bedrock-runtime'import { NextRequest } from 'next/server'const decoder = new TextDecoder()const bedrock = new BedrockRuntime({ region: 'us-east-1' })async function* makeIterator(prompt: String) {const claudePrompt = `\n\nHuman: ${prompt} \n\nAssistant:`const config = {prompt: claudePrompt,max_tokens_to_sample: 2048,temperature: 0.5,top_k: 250,top_p: 1,stop_sequences: ['\n\nHuman:']}const command = new InvokeModelWithResponseStreamCommand({modelId: 'anthropic.claude-3-haiku-20240307-v1:0',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({anthropic_version: 'bedrock-2023-05-31',max_tokens: 2048,messages: [{role: 'user',content: [{type: 'text',text: prompt}]}]})})try {console.log('call bedrock ...')const response = await bedrock.send(command)if (response.body) {console.log(response.body)for await (const chunk of response.body) {if (chunk.chunk) {try {const json = JSON.parse(decoder.decode(chunk.chunk.bytes))console.log(json)if (json.type == 'content_block_delta') {yield json.delta.text// yield chunk.chunk.bytes;}} catch (error) {console.log(error)yield ' '}}// await sleep(100);}}} catch (error) {console.log(error)}}function iteratorToStream(iterator: any) {return new ReadableStream({async pull(controller) {const { value, done } = await iterator.next()if (done) {controller.close()} else {controller.enqueue(value)}}})}function sleep(time: number) {return new Promise(resolve => {setTimeout(resolve, time)})}const encoder = new TextEncoder()export async function GET() {const iterator = makeIterator('how to cook chicken soup?')const stream = iteratorToStream(iterator)return new Response(stream)}export async function POST(request: NextRequest) {const res = await request.json()console.log(res)const iterator = makeIterator(res.prompt)const stream = iteratorToStream(iterator)return new Response(stream)// return Response.json({ name: "hai" });}
Analyse Image#
- Call Claude3 Sonet and Haiku to analyse image
- Capture image from camera
Invoke Bedrock Claude3 in batch mode
import { NextRequest } from 'next/server'import {BedrockRuntimeClient,InvokeModelCommand,InvokeModelWithResponseStreamCommand} from '@aws-sdk/client-bedrock-runtime'const decoder = new TextDecoder()const bedrockClient = new BedrockRuntimeClient({region: 'us-east-1'})const describeImageBedrockBatch = async ({ image }: { image: string }) => {// command to claude3 sonetconst commandHaiku = new InvokeModelCommand({modelId: 'anthropic.claude-3-haiku-20240307-v1:0',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({anthropic_version: 'bedrock-2023-05-31',max_tokens: 1000,messages: [{role: 'user',content: [{type: 'image',source: {type: 'base64',media_type: 'image/jpeg',data: image}},{type: 'text',text: 'this is an image about fashion, please describe be in detail, and giving positive and fun comments'}]}]})})const response = await bedrockClient.send(commandHaiku)// console.log(JSON.parse(new TextDecoder().decode(response.body)));return JSON.parse(new TextDecoder().decode(response.body))}
Invoke Bedrock Claude3 in stream mode
async function* makeIterator(image: String) {const command = new InvokeModelWithResponseStreamCommand({modelId: 'anthropic.claude-3-haiku-20240307-v1:0',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({anthropic_version: 'bedrock-2023-05-31',max_tokens: 2048,messages: [{role: 'user',content: [{type: 'image',source: {type: 'base64',media_type: 'image/jpeg',data: image}},{type: 'text',text: 'this is an image about fashion, please describe be in detail, and giving positive and fun comments'}]}]})})try {console.log('call bedrock ...')const response = await bedrockClient.send(command)if (response.body) {console.log(response.body)for await (const chunk of response.body) {if (chunk.chunk) {try {const json = JSON.parse(decoder.decode(chunk.chunk.bytes))console.log(json)if (json.type == 'content_block_delta') {yield json.delta.text}} catch (error) {console.log(error)yield ' '}}}}} catch (error) {console.log(error)}}function iteratorToStream(iterator: any) {return new ReadableStream({async pull(controller) {const { value, done } = await iterator.next()if (done) {controller.close()} else {controller.enqueue(value)}}})}export async function GET() {return new Response('hello')}export async function POST(request: NextRequest) {// parse image from requestconst res = await request.json()const image = res.image.split(',').pop()// call bedrock batch to describe the image// const description = await describeImageBedrockBatch({ image: image });// return Response.json(description);// invoke bedrock streamconst iterator = makeIterator(image)const stream = iteratorToStream(iterator)return new Response(stream)}
Here is the frontend code
analyze-image.tsx
'use client'import Script from 'next/script'const decoder = new TextDecoder()const MirrorPage = () => {return (<div><div className="max-w-3xl mx-auto"><div className="flex flex-row space-x-2 mt-5"><a href="/mirror"><button className="bg-orange-500 px-5 py-3 rounded-sm cursor-pointer">Claude Haiku</button></a><a href="/mirror-claude-sonet"><button className="bg-orange-500 px-5 py-3 rounded-sm cursor-pointer">Claude Sonet</button></a><a href="/mirror-llava"><button className="bg-orange-500 px-5 py-3 rounded-sm cursor-pointer">LLaVA</button></a></div></div><div className="max-w-3xl mx-auto"><div className="min-h-screen flex flex-col justify-center items-center"><video id="video" width={800} height={600}></video><canvasid="canvas"width={800}height={600}style={{ display: 'none' }}></canvas><buttonclassName="bg-green-500 px-10 py-3 rounded-sm cursor-pointer mt-5"onClick={async event => {event.preventDefault()document.getElementById('modal')!.classList.toggle('hidden')const canvas = document.getElementById('canvas')const video = document.getElementById('video');(canvas as any).getContext('2d').drawImage(video,0,0,(canvas as any).width,(canvas as any).height)const dataUrl = (canvas as any).toDataURL('image/jpeg')document.getElementById('modal-photo')!.setAttribute('src', dataUrl)document.getElementById('image-description')!.innerHTML ='I am thinking ... 'const response = await fetch('/api/image-haiku', {method: 'POST',headers: {'Content-Type': 'application/json'},body: JSON.stringify({ image: dataUrl })})// parse batch response// const desc = await response.text();// document.getElementById("image-description")!.innerHTML =// JSON.parse(desc).content[0].text;// parse stream responseconst story = document.getElementById('image-description')story!.innerText = ''const reader = response.body!.getReader()while (true) {const { done, value } = await reader.read()if (done) {break}try {const json = decoder.decode(value)story!.innerText += jsonconsole.log(json)} catch (error) {console.log(error)// story!.innerText += "ERROR";}}}}>Submit</button></div><divclassName="bg-gray-100 min-h-screen w-[100%] fixed top-0 left-0 opacity-100 hidden"id="modal"><div className="flex flex-col min-h-screen justify-center items-center"><div className="relative"><img width={800} height={600} id="modal-photo"></img><pid="image-description"className="absolute bottom-0 left-0 bg-gray-100 p-5 opacity-85">Lorem ipsum dolor sit, amet consectetur adipisicing elit. Sequi,enim veniam! Blanditiis magni quasi id quia harum voluptatibus,tempora voluptates illum vel cum. Consequuntur quam ipsa, undeeos inventore nemo. Lorem ipsum dolor sit amet, consecteturadipisicing elit. Dolorum, expedita nam eos voluptatibusconsequatur modi quis aut ducimus veritatis omnis laborumaccusantium? Illum inventore soluta suscipit ipsam velitconsectetur incidunt?</p></div><buttonclassName="bg-orange-500 px-10 py-3 rounded-sm mt-5"onClick={event => {event.preventDefault()document.getElementById('modal')!.classList.toggle('hidden')}}>Close</button></div></div></div><Script>{`const video = document.getElementById("video");const getCamera = async () => {let stream = null;try {stream = await navigator.mediaDevices.getUserMedia({video: { width: 1280, height: 720 },});video.srcObject = stream;video.onloadedmetadata = () => {video.play();};} catch (error) {console.log(error);}};const main = async () => {await getCamera();};main();`}</Script></div>)}export default MirrorPage
Process PDF#
- Use Amazon Textract and handle async jobs
- Use simple PyPDFLoader langchain CharacterTextSplitter
- Load documents (chunks) to AOSS
First let use Textract to process a PDF uploaded to S3
import boto3import time# data in s3BUCKET = ''FILE = 'book/datasheet.pdf'# create textract clientclient = boto3.client("textract")# create a jobresponse = client.start_document_text_detection(DocumentLocation={'S3Object': {'Bucket': BUCKET,'Name': FILE}})# check job statusjob_id = response['JobId']# get job statusresponse = client.get_document_text_detection(JobId=job_id)response['JobStatus']# get responsepages = []time.sleep(1)response = client.get_document_text_detection(JobId=job_id)pages.append(response)next_token = Noneif 'NextToken' in response:next_token = response['NextToken']while next_token:time.sleep(1)response = client.get_document_text_detection(JobId=job_id,NextToken=next_token)# update next_tokennext_token=Noneif 'NextToken' in response:next_token = response['NextToken']
Second let write a lambda function to process a PDF file uploaded to S3, and index to AOSS
from langchain_community.document_loaders import PyPDFLoaderloader = PyPDFLoader("datasheet.pdf")pages = loader.load_and_split()# get number of pagelen(pages)# print pagesfor page in pages:print(page.page_content)# text splitterfrom langchain.text_splitter import CharacterTextSplitter# define chunk length and overlap windowtext_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)# split pages into docs each doc chunk is 1000 tokendocs = text_splitter.split_documents(documents=pages)# check a doc or chunk contentdocs[0].page_content
Here is python lambda function which using PyPDFLoader, langchain to load pdf and index into AOSS
load-pdf-index-aoss.py
import boto3import jsonfrom langchain_community.document_loaders import PyPDFLoaderfrom langchain.text_splitter import CharacterTextSplitterBUCKET = 'cdk-entest-videos'FILE = 'book/datasheet.pdf'# chunk sizeCHUNK_SIZE = 1000# bedrock clientbedrock_client = boto3.client("bedrock-runtime")# s3 clients3_client = boto3.client("s3")def load_pdf_to_aoss(filename: str):"""load pdf to opensearch"""# download s3 files3_client.download_file(BUCKET,FILE,f'/tmp/{filename}')# read pdf fileloader = PyPDFLoader(f'/tmp/{filename}')pages = loader.load_and_split()# chunk with fixed sizetext_splitter = CharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=0)docs = text_splitter.split_documents(documents=pages)# embeddingvectors = []for doc in docs:body = json.dumps({"inputText": docs[0].page_content})response = bedrock_client.invoke_model(body=body,modelId='amazon.titan-embed-text-v1',accept='application/json',contentType='application/json')vector = json.loads(response['body'].read())['embedding']vectors.append(vector)# indexing to aossreturn vectors
Finally let write a function to index documents or chunk of text to AOSS
import boto3import jsonfrom langchain_community.document_loaders import PyPDFLoaderfrom langchain.text_splitter import CharacterTextSplitter# chunk sizeCHUNK_SIZE = 1000# bedrock clientbedrock_client = boto3.client("bedrock-runtime")def load_document_to_aoss(document: str):"""load pdf to opensearch"""# chunk with fixed sizetext_splitter = CharacterTextSplitter(separator="\n\n",chunk_size=10,chunk_overlap=0,length_function=len,is_separator_regex=False)docs = text_splitter.create_documents([document])# embeddingvectors = []for doc in docs:body = json.dumps({"inputText": docs[0].page_content})response = bedrock_client.invoke_model(body=body,modelId='amazon.titan-embed-text-v1',accept='application/json',contentType='application/json')vector = json.loads(response['body'].read())['embedding']vectors.append(vector)# indexing to aossreturn vectors
Basic Prompt#
Let create a multiple conversation turns which mean chat history between client and server. First we have to update the client web to store the chat messages history. Here is the full code
Here is an example
const commandHaiku = new InvokeModelWithResponseStreamCommand({modelId: 'anthropic.claude-3-haiku-20240307-v1:0',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({anthropic_version: 'bedrock-2023-05-31',max_tokens: 1024,system: `Please response in Vietnamese only!`,messages: [{role: 'user',content: [{type: 'text',text: 'How to cook chicken soup?'}]},{role: 'assistant',content: [{type: 'text',text: `Here is a basic recipe for cooking chicken soup: ...`}]},{role: 'user',content: [{type: 'text',text: prompt}]}]})})
Here is the full code for testing
bedrock-prompt.ts
import {BedrockRuntimeClient,InvokeModelWithResponseStreamCommand,} from "@aws-sdk/client-bedrock-runtime";const decoder = new TextDecoder();const bedrockClient = new BedrockRuntimeClient({ region: "us-east-1" });const chatBedrock = async ({ prompt }: { prompt: string }) => {//let answer = "";const messages = [{role: "user",content: [{type: "text",text: "How to cook chicken soup?",},],},{role: "assistant",content: [{type: "text",text: `Here is a basic recipe for cooking chicken soup:Ingredients:- 1 whole chicken or 3-4 lbs of chicken parts (breasts, thighs, drumsticks)- 8 cups of water or chicken broth- 2 large carrots, peeled and sliced- 2 celery stalks, sliced- 1 onion, diced- 2 cloves garlic, minced- 1 bay leaf- 1 tsp dried thyme- Salt and pepper to taste- Egg noodles or other pasta (optional)Instructions:1. Place the whole chicken or chicken parts in a large pot and cover with the water or broth. Bring to a boil over high heat.2. Once boiling, reduce heat to medium-low, cover and simmer for 45-60 minutes, until the chicken is cooked through.3. Remove the chicken from the pot and set aside. Skim any foam or fat that rises to the surface.4. Add the carrots, celery, onion, garlic, bay leaf and thyme to the pot. Simmer for 15-20 minutes until the vegetables are tender.5. Meanwhile, remove the meat from the chicken bones and shred or chop it into bite-size pieces.6. Add the cooked chicken back to the pot and season with salt and pepper to taste.7. If using, add the egg noodles or other pasta and cook for the time specified on the package.8. Serve the chicken soup hot. You can garnish with fresh parsley or dill if desired.The longer you simmer the soup, the more flavorful it will become. You can also add other vegetables like potatoes, peas, or corn. Adjust seasoning as needed.`,},],},{role: "user",content: [{type: "text",text: prompt,},],},];const messages1 = [{role: "user",content: "How to cook chicken soup?",},{role: "assistant",content: `Here is a basic recipe for cooking chicken soup:Ingredients:- 1 whole chicken or 3-4 lbs of chicken parts (breasts, thighs, drumsticks)- 8 cups of water or chicken broth- 2 large carrots, peeled and sliced- 2 celery stalks, sliced- 1 onion, diced- 2 cloves garlic, minced- 1 bay leaf- 1 tsp dried thyme- Salt and pepper to taste- Egg noodles or other pasta (optional)Instructions:1. Place the whole chicken or chicken parts in a large pot and cover with the water or broth. Bring to a boil over high heat.2. Once boiling, reduce heat to medium-low, cover and simmer for 45-60 minutes, until the chicken is cooked through.3. Remove the chicken from the pot and set aside. Skim any foam or fat that rises to the surface.4. Add the carrots, celery, onion, garlic, bay leaf and thyme to the pot. Simmer for 15-20 minutes until the vegetables are tender.5. Meanwhile, remove the meat from the chicken bones and shred or chop it into bite-size pieces.6. Add the cooked chicken back to the pot and season with salt and pepper to taste.7. If using, add the egg noodles or other pasta and cook for the time specified on the package.8. Serve the chicken soup hot. You can garnish with fresh parsley or dill if desired.The longer you simmer the soup, the more flavorful it will become. You can also add other vegetables like potatoes, peas, or corn. Adjust seasoning as needed.`,},{role: "user",content: prompt,},];const commandHaiku = new InvokeModelWithResponseStreamCommand({modelId: "anthropic.claude-3-haiku-20240307-v1:0",contentType: "application/json",accept: "application/json",body: JSON.stringify({anthropic_version: "bedrock-2023-05-31",max_tokens: 1024,system: `Please response in Vietnamese only!`,messages: messages1,}),});const response = await bedrockClient.send(commandHaiku);console.log(response.body);if (response.body) {for await (const chunk of response.body) {if (chunk.chunk) {try {const data = JSON.parse(decoder.decode(chunk.chunk.bytes));if (data.type == "content_block_start") {console.log(data.content_block.text);answer += data.content_block.text;} else if (data.type == "content_block_delta") {console.log(data.delta.text);answer += data.delta.text;} else {}} catch (error) {console.log(error);}// console.log(JSON.parse(decoder.decode(chunk.chunk.bytes)));}}}console.log(answer);};const main = async () => {await chatBedrock({prompt: "How to customize it for 3 years old girl?",});};main();
And here is the frontend code
haiku.html
"use client";import { SendIcon } from "@/components/icons";import { Layout } from "@/components/layout";const ChatPage = () => {// conversation turnslet messages: any[] = [];const callBedrock = async () => {// get user question from frotendconst userQuestion = (document.getElementById("user-question") as HTMLInputElement).value;// present model response to frontendconst modelAnswer = document.getElementById("model-answer");modelAnswer!.innerText = "";// push user userQuestion to messagesmessages.push({ role: "user", content: userQuestion });// keep the most recent 9 messages [user,bot,user,...]messages = messages.slice(-9);try {const response = await fetch("/api/claudehaiku", {method: "POST",headers: {"Content-Type": "application/json",},body: JSON.stringify({ messages: messages }),});// console.log(response);const reader = response.body!.getReader();const decoder = new TextDecoder();while (true) {const { done, value } = await reader.read();if (done) {break;}try {const json = decoder.decode(value);modelAnswer!.innerText += json;console.log(json);} catch (error) {console.log(error);// modelAnswer!.innerText += "ERROR";}}// push bot response to messagesmessages.push({ role: "assistant", content: modelAnswer!.innerText });} catch (error) {console.log(error);}};const children = (<div><form><div className="fixed top-0 mt-8 w-full max-w-md px-5"><inputtype="text"className="w-full p-2 border border-gray-300 rounded shadow-xl"id="user-question"placeholder="say something enter ..."></input><buttonclassName="px-5 py-1 rounded-sm absolute top-[50%] right-2 translate-y-[-50%] opacity-80"onClick={(event) => {event.preventDefault();callBedrock();}}><SendIcon className="w-5 h-5 text-green-500" /></button></div></form><pid="model-answer"className="px-5"style={{ color: "green", marginBottom: "10px" }}></p></div>);return <Layout>{children}</Layout>;};export default ChatPage;
The sever code is almost no change, just req.messages and send to bedrocks
Knowledge Base Task Role#
Here is the trusted policy.
{"Version": "2012-10-17","Statement": [{"Effect": "Allow","Principal": {"Service": "ecs-tasks.amazonaws.com"},"Action": "sts:AssumeRole"}]}
And here is the permission policy.
{"Version": "2012-10-17","Statement": [{"Action": ["bedrock:InvokeModel","bedrock:InvokeModelWithResponseStream","bedrock:InvokeAgent"],"Resource": ["arn:aws:bedrock:<REGION>::foundation-model/*"],"Effect": "Allow"},{"Action": ["bedrock:Retrieve", "bedrock:RetrieveAndGenerate"],"Resource": "arn:aws:bedrock:<REGION>:<ACCOUNT_ID>:knowledge-base/<KB_ID>","Effect": "Allow"},{"Action": ["s3:GetObject", "s3:PutObject"],"Resource": ["arn:aws:s3:::cdk-entest-videos/*","arn:aws:s3:::bedrock-knowlege-based-haimtran-demo"],"Effect": "Allow"},{"Action": "aoss:APIAccessAll","Resource": ["arn:aws:aoss:<REGION>:<ACCOUNT_ID>:collection/<COLLECTION_ID>"],"Effect": "Allow"}]}
Knowledge Application#
- Update aoss-index
- Update aoss-query
- Update KB retrieve
- Update KB retrieve and generate
Here is aoss-index backend.
import { config } from '@/config'import {BedrockRuntimeClient,InvokeModelCommand} from '@aws-sdk/client-bedrock-runtime'import { NextRequest } from 'next/server'const { defaultProvider } = require('@aws-sdk/credential-provider-node')const { Client } = require('@opensearch-project/opensearch')const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws')const decoder = new TextDecoder()// bedrock clientconst bedrockClient = new BedrockRuntimeClient({region: config.REGION_BEROCK})// opensearch serverless clientconst aossClient = new Client({...AwsSigv4Signer({region: config.AOSS_REGION,service: 'aoss',getCredentials: () => {const credentialsProvider = defaultProvider({})return credentialsProvider()}}),node: config.AOSS_URL})// convert text to vector using bedrock titanconst createEmbedVector = async ({ doc }: { doc: string }) => {const response = await bedrockClient.send(new InvokeModelCommand({modelId: 'amazon.titan-embed-text-v1',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({inputText: doc})}))const vec = JSON.parse(decoder.decode(response.body))['embedding']return vec}// index opensearch collectionconst indexAoss = async ({title,link,text}: {title: stringlink: stringtext: string}) => {// no chunk splitconst vec = await createEmbedVector({ doc: text })// serverless does not support idconst body = {vector_field: vec,title: title,link: link,text: text}var response = await aossClient.index({index: config.AOSS_INDEX_NAME,body: body})console.log(response.body)return response.body}export async function POST(request: NextRequest) {// parse requestconst req = await request.json()// query opensearchconst docs = await indexAoss({title: req.title,link: req.link,text: req.text})// responsereturn Response.json({ docs: docs })}
Here is aoss-query backend.
import { config } from '@/config'import {BedrockRuntimeClient,InvokeModelCommand} from '@aws-sdk/client-bedrock-runtime'import { NextRequest } from 'next/server'const { defaultProvider } = require('@aws-sdk/credential-provider-node')const { Client } = require('@opensearch-project/opensearch')const { AwsSigv4Signer } = require('@opensearch-project/opensearch/aws')const decoder = new TextDecoder()// bedrock clientconst bedrockClient = new BedrockRuntimeClient({region: config.REGION_BEROCK})// opensearch serverless clientconst aossClient = new Client({...AwsSigv4Signer({region: config.AOSS_REGION,service: 'aoss',getCredentials: () => {const credentialsProvider = defaultProvider({})return credentialsProvider()}}),node: config.AOSS_URL})// convert text to vector using bedrock titanconst createEmbedVector = async ({ doc }: { doc: string }) => {const response = await bedrockClient.send(new InvokeModelCommand({modelId: 'amazon.titan-embed-text-v1',contentType: 'application/json',accept: 'application/json',body: JSON.stringify({inputText: doc})}))const vec = JSON.parse(decoder.decode(response.body))['embedding']return vec}// query opensearch by titleconst queryByTitle = async ({ query }: { query: string }) => {// query opensearch collection top 5 vectorsconst body = {size: 5,query: {match: {title: query}}}var response = await aossClient.search({index: config.AOSS_INDEX_NAME,body: body})// return hits to clientconst docs = response.body.hits.hitsconsole.log(docs)return docs}// query opensearch collectionconst queryAoss = async ({ query }: { query: string }) => {// convert text to vector using bedrock titanconst vec = await createEmbedVector({ doc: query })// query opensearch collection top 5 vectorsconst body = {size: 5,query: {knn: {vector_field: {vector: vec,k: 5}}}}var response = await aossClient.search({index: config.AOSS_INDEX_NAME,body: body})// return hits to clientconst docs = response.body.hits.hits// console.log(docs);return docs}// index opensearch collectionconst indexAoss = async ({ doc, title }: { doc: string; title: string }) => {// no chunk splitconst vec = await createEmbedVector({ doc: doc })// serverless does not support idconst body = {vector_field: vec,text: doc,title: title}var response = await aossClient.index({index: config.AOSS_INDEX_NAME,body: body})// console.log(response);}export async function POST(request: NextRequest) {// parse requestconst req = await request.json()// query opensearchconst docs = await queryByTitle({ query: req.query })// responsereturn Response.json({ docs: docs })}
Here is KB retrieve backend.
import { config } from '@/config'import {BedrockAgentRuntimeClient,RetrieveCommand} from '@aws-sdk/client-bedrock-agent-runtime'import { NextRequest } from 'next/server'// create a text decoderconst decoder = new TextDecoder()// create bedrock agent runtime clientconst bedrock = new BedrockAgentRuntimeClient({region: config.REGION_BEROCK})const retrieveBedrockKB = async ({ query }: { query: string }) => {// create commandconst command = new RetrieveCommand({knowledgeBaseId: config.KNOWLEDGE_BASE_ID,retrievalQuery: {text: query},retrievalConfiguration: {vectorSearchConfiguration: {numberOfResults: Number(10),overrideSearchType: 'HYBRID' || 'SEMANTIC'}}})// send commandconst response = await bedrock.send(command)console.log(response.retrievalResults)return response.retrievalResults}export async function POST(request: NextRequest) {// parse requestconst req = await request.json()// query opensearchconst docs = await retrieveBedrockKB({ query: req.query })// responsereturn Response.json({ docs: docs })}
Here is KB retrieve-generate backend.
import { config } from '@/config'import {BedrockAgentRuntimeClient,RetrieveAndGenerateCommand} from '@aws-sdk/client-bedrock-agent-runtime'import { NextRequest } from 'next/server'// create a text decoderconst decoder = new TextDecoder()// create bedrock agent runtime clientconst bedrock = new BedrockAgentRuntimeClient({region: config.REGION_BEROCK})const retrieveGenerateBedrockKB = async ({ query }: { query: string }) => {// create commandconst command = new RetrieveAndGenerateCommand({input: {text: query},retrieveAndGenerateConfiguration: {type: 'KNOWLEDGE_BASE',knowledgeBaseConfiguration: {knowledgeBaseId: config.KNOWLEDGE_BASE_ID,modelArn: config.KNOWLEDGE_BASE_MODEL_ARN,retrievalConfiguration: {vectorSearchConfiguration: {numberOfResults: Number(10),overrideSearchType: 'HYBRID' || 'SEMANTIC'}}}}})// send commandconst response = await bedrock.send(command)console.log(response.citations)return response}export async function POST(request: NextRequest) {// parse requestconst req = await request.json()// query opensearchconst response = await retrieveGenerateBedrockKB({ query: req.query })// responsereturn Response.json({output: response.output,citations: response.citations})}
Knowledge Base Frontend#
Here is aoss-index frontend.
'use client'import { useState } from 'react'const OpenSearchPage = () => {return (<main><div className="flex flex-col w-full max-w-md py-24 mx-auto stretch"><form><div className="grid grid-cols-1 gap-2 mt-8 w-full max-w-md px-5"><inputtype="text"className="w-full p-2 border border-gray-300 rounded"id="title"placeholder="title"></input><inputtype="link"className="w-full p-2 border border-gray-300 rounded"id="link"placeholder="link"></input><textareaid="content"className="w-ful border border-gray-300 rounded p-2"rows={10}placeholder="content ..."></textarea><buttonclassName="bg-green-500 rounded-md px-5 py-3 max-w-[150px]"onClick={async event => {//event.preventDefault()console.log('submit')// get titlelet title = (document.getElementById('title') as HTMLInputElement).value// get linklet link = (document.getElementById('link') as HTMLInputElement).value// get contentlet content = (document.getElementById('content') as HTMLInputElement).valueconsole.log(title, link, content)//document.getElementById('modal')!.style.display = 'block'// call the apitry {const response = await fetch('/api/aoss-index', {method: 'POST',headers: {'Content-Type': 'application/json'},body: JSON.stringify({title: title,link: link,text: content})})const data = await response.json()console.log(data);(document.getElementById('aoss-response') as HTMLInputElement).innerText = JSON.stringify(data.docs, null, 2)} catch (error) {console.log(error);(document.getElementById('aoss-response') as HTMLInputElement).innerText = 'ERROR'}//}}>Submit</button></div></form><divid="modal"className="fixed top-0 left-0 bg-slate-100 min-h-screen w-full opacity-100"hidden><div className="min-h-screen flex justify-center items-center"><div className="max-w-md w-full"><p>Please wait for opensearch indexing ...</p><divid="aoss-response"className="w-full bg-gray-100 border border-gray-300 rounded p-2 min-h-[300px]"></div><buttonclassName="bg-orange-500 px-10 py-3 max-w-[150px] rounded-md mt-2"onClick={() => {document.getElementById('modal')!.style.display = 'none'}}>Close</button></div></div></div></div></main>)}export default OpenSearchPage
Here is retrieve and generate frontend.
'use client'import { SendIcon } from '@/components/icons'import { useState } from 'react'const KBRetrieveGeneratePage = () => {const [answer, setAnswer] = useState<null | string>(null)const [docs, setDocs] = useState<any[]>([])return (<div className="flex flex-col w-full max-w-md py-24 mx-auto stretch"><form><div className="fixed top-0 mt-8 w-full max-w-md px-5"><inputtype="text"className="w-full p-2 border border-gray-300 rounded shadow-xl"id="user-question"placeholder="say something enter ..."></input><buttonclassName="px-5 py-1 rounded-sm absolute top-[50%] right-2 translate-y-[-50%] opacity-80"onClick={async event => {event.preventDefault()// query opensearch collectionconst query = (document.getElementById('user-question') as HTMLInputElement).value// call fetch request to query aosstry {const response = await fetch('/api/retrieve-generate', {method: 'POST',headers: {'Content-Type': 'application/json'},body: JSON.stringify({ query: query })})const data = await response.json()console.log(data)setDocs(data.citations)//setAnswer(data.output.text)} catch (error) {console.log(error)}}}><SendIcon className="w-5 h-5 text-green-500" /></button></div></form><div><div>{answer && (<><h3 className="font-bold">Answer</h3><p>{answer}</p><hr className="bg-blue-500 border-1 border-blue-500"></hr></>)}</div>{docs.map((doc, idx) => (<div key={idx} className="mb-3"><h3 className="font-bold">Generated Part: {idx}</h3><p>{doc.generatedResponsePart.textResponsePart.text}</p>{(doc.retrievedReferences as any[]).map((ref, idy) => (<div key={idy}><h3 className="font-bold text-blue-600">Citation: {idy}</h3><a href="#" className="underline text-blue-600">{ref.metadata['x-amz-bedrock-kb-source-uri']}</a><p className="text-blue-600">{ref.content.text}</p></div>))}<hr className="bg-blue-500 border-1 border-blue-500"></hr></div>))}</div></div>)}export default KBRetrieveGeneratePage