Documentation
¶
Overview ¶
Example ¶
package main
import (
"context"
"fmt"
"os"
"github.com/coseyo/gptbot"
)
func main() {
ctx := context.Background()
apiKey := os.Getenv("OPENAI_API_KEY")
encoder := gptbot.NewOpenAIEncoder(apiKey, "")
store := gptbot.NewLocalVectorStore()
// Feed documents into the vector store.
feeder := gptbot.NewFeeder(&gptbot.FeederConfig{
Encoder: encoder,
Updater: store,
})
err := feeder.Feed(ctx, &gptbot.Document{
ID: "1",
Text: "Generative Pre-trained Transformer 3 (GPT-3) is an autoregressive language model released in 2020 that uses deep learning to produce human-like text. Given an initial text as prompt, it will produce text that continues the prompt.\n\nThe architecture is a decoder-only transformer network with a 2048-token-long context and then-unprecedented size of 175 billion parameters, requiring 800GB to store. The model was trained using generative pre-training; it is trained to predict what the next token is based on previous tokens. The model demonstrated strong zero-shot and few-shot learning on many tasks.[2]",
})
if err != nil {
fmt.Printf("err: %v", err)
return
}
// Chat with the bot to get answers.
bot := gptbot.NewBot(&gptbot.BotConfig{
APIKey: apiKey,
Encoder: encoder,
Querier: store,
})
question := "When was GPT-3 released?"
answer, err := bot.Chat(ctx, question)
if err != nil {
fmt.Printf("err: %v", err)
return
}
fmt.Printf("Q: %s\n", question)
fmt.Printf("A: %s\n", answer)
question = "How many parameters does GPT-3 use?"
answer, err = bot.Chat(ctx, question)
if err != nil {
fmt.Printf("err: %v", err)
return
}
fmt.Printf("Q: %s\n", question)
fmt.Printf("A: %s\n", answer)
}
Output: Q: When was GPT-3 released? A: GPT-3 was released in 2020. Q: How many parameters does GPT-3 use? A: GPT-3 uses 175 billion parameters.
Example (MultiTurn) ¶
package main
import (
"context"
"fmt"
"os"
"github.com/coseyo/gptbot"
)
func main() {
ctx := context.Background()
apiKey := os.Getenv("OPENAI_API_KEY")
encoder := gptbot.NewOpenAIEncoder(apiKey, "")
store := gptbot.NewLocalVectorStore()
// Feed documents into the vector store.
feeder := gptbot.NewFeeder(&gptbot.FeederConfig{
Encoder: encoder,
Updater: store,
})
err := feeder.Feed(ctx, &gptbot.Document{
ID: "1",
Text: "Generative Pre-trained Transformer 3 (GPT-3) is an autoregressive language model released in 2020 that uses deep learning to produce human-like text. Given an initial text as prompt, it will produce text that continues the prompt.\n\nThe architecture is a decoder-only transformer network with a 2048-token-long context and then-unprecedented size of 175 billion parameters, requiring 800GB to store. The model was trained using generative pre-training; it is trained to predict what the next token is based on previous tokens. The model demonstrated strong zero-shot and few-shot learning on many tasks.[2]",
})
if err != nil {
fmt.Printf("err: %v", err)
return
}
// Chat with the bot to get answers.
bot := gptbot.NewBot(&gptbot.BotConfig{
APIKey: apiKey,
Encoder: encoder,
Querier: store,
})
var history []*gptbot.Turn
question := "When was GPT-3 released?"
answer, err := bot.Chat(ctx, question, history...)
if err != nil {
fmt.Printf("err: %v", err)
return
}
fmt.Printf("Q: %s\n", question)
fmt.Printf("A: %s\n", answer)
// Save the conversation history.
history = append(history, &gptbot.Turn{
Question: question,
Answer: answer,
})
question = "How many parameters does it use?" // In multi-turn mode, here "it" will be inferred to "GPT-3".
answer, err = bot.Chat(ctx, question, history...)
if err != nil {
fmt.Printf("err: %v", err)
return
}
fmt.Printf("Q: %s\n", question)
fmt.Printf("A: %s\n", answer)
}
Output: Q: When was GPT-3 released? A: GPT-3 was released in 2020. Q: How many parameters does it use? A: GPT-3 uses 175 billion parameters.
Index ¶
- Constants
- type Bot
- type BotConfig
- type Chunk
- type Document
- type Embedding
- type Encoder
- type Feeder
- type FeederConfig
- type LocalVectorStore
- func (vs *LocalVectorStore) Delete(ctx context.Context, documentIDs ...string) error
- func (vs *LocalVectorStore) GetAllData(ctx context.Context) map[string][]*Chunk
- func (vs *LocalVectorStore) Insert(ctx context.Context, chunks map[string][]*Chunk) error
- func (vs *LocalVectorStore) LoadJSON(ctx context.Context, filename string) error
- func (vs *LocalVectorStore) Query(ctx context.Context, embedding Embedding, topK int) ([]*Similarity, error)
- func (vs *LocalVectorStore) StoreJSON(filename string) error
- type Metadata
- type ModelType
- type OpenAIEncoder
- type Preprocessor
- type PreprocessorConfig
- type PromptData
- type PromptTemplate
- type Querier
- type Similarity
- type Turn
- type Updater
- type XPreprocessor
Examples ¶
Constants ¶
View Source
const ( DefaultPromptTmpl = `` /* 225-byte string literal not displayed */ DefaultMultiTurnPromptTmpl = `` /* 786-byte string literal not displayed */ )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BotConfig ¶
type BotConfig struct {
// APIKey is the OpenAI's APIKey.
// This field is required.
APIKey string
// Encoder is an Embedding Encoder, which will encode the user's question into a vector for similarity search.
// This field is required.
Encoder Encoder
// Querier is a search engine, which is capable of doing the similarity search.
// This field is required.
Querier Querier
// Model is the ID of OpenAI's model to use for chat.
// Defaults to "gpt-3.5-turbo".
Model ModelType
// TopK specifies how many candidate similarities will be selected to construct the prompt.
// Defaults to 3.
TopK int
// MaxTokens is the maximum number of tokens to generate in the chat.
// Defaults to 256.
MaxTokens int
// PromptTmpl specifies a custom prompt template for single-turn mode.
// Defaults to DefaultPromptTmpl.
PromptTmpl string
// MultiTurnPromptTmpl specifies a custom prompt template for multi-turn mode.
// Defaults to DefaultMultiTurnPromptTmpl.
//
// Prompt-based question answering bot essentially operates in single-turn mode,
// since the quality of each answer largely depends on the associated prompt context
// (i.e. the most similar document chunks), which all depends on the corresponding
// question rather than the conversation history.
//
// As a workaround, we try to achieve the effect of multi-turn mode by adding an
// extra frontend agent, who can respond directly to the user for casual greetings,
// and can refine incomplete questions according to the conversation history
// before consulting the backend system (i.e. the single-turn Question Answering Bot).
MultiTurnPromptTmpl string
}
type Feeder ¶
type Feeder struct {
// contains filtered or unexported fields
}
func NewFeeder ¶
func NewFeeder(cfg *FeederConfig) *Feeder
type FeederConfig ¶
type FeederConfig struct {
// Encoder is the embedding encoder.
// This field is required.
Encoder Encoder
// Updater is the vector store for inserting/deleting chunks.
// This field is required.
Updater Updater
// Defaults to NewPreprocessor(...).
Preprocessor XPreprocessor
// BatchSize is the number of chunks to encode/upsert at a time.
// Defaults to 100.
BatchSize int
}
type LocalVectorStore ¶
type LocalVectorStore struct {
// contains filtered or unexported fields
}
func NewLocalVectorStore ¶
func NewLocalVectorStore() *LocalVectorStore
func (*LocalVectorStore) Delete ¶
func (vs *LocalVectorStore) Delete(ctx context.Context, documentIDs ...string) error
Delete deletes the chunks belonging to the given documentIDs. As a special case, empty documentIDs means deleting all chunks.
func (*LocalVectorStore) GetAllData ¶
func (vs *LocalVectorStore) GetAllData(ctx context.Context) map[string][]*Chunk
GetAllData returns all the internal data. It is mainly used for testing purpose.
func (*LocalVectorStore) LoadJSON ¶
func (vs *LocalVectorStore) LoadJSON(ctx context.Context, filename string) error
LoadJSON will deserialize from disk into a `LocalVectorStore` based on the provided filename.
func (*LocalVectorStore) Query ¶
func (vs *LocalVectorStore) Query(ctx context.Context, embedding Embedding, topK int) ([]*Similarity, error)
func (*LocalVectorStore) StoreJSON ¶
func (vs *LocalVectorStore) StoreJSON(filename string) error
StoreJSON will serialize the `LocalVectorStore` to disk based on the provided filename.
type ModelType ¶
type ModelType string
const ( // GPT-4 GPT4 ModelType = "gpt-4" // GPT-3.5 GPT3Dot5Turbo ModelType = "gpt-3.5-turbo" TextDavinci003 ModelType = "text-davinci-003" TextDavinci002 ModelType = "text-davinci-002" // GPT-3 TextAda001 ModelType = "text-ada-001" TextCurie001 ModelType = "text-curie-001" TextBabbage001 ModelType = "text-babbage-001" )
type OpenAIEncoder ¶
type OpenAIEncoder struct {
// contains filtered or unexported fields
}
func NewOpenAIEncoder ¶
func NewOpenAIEncoder(apiKey string, model string) *OpenAIEncoder
func (*OpenAIEncoder) EncodeBatch ¶
type Preprocessor ¶
type Preprocessor struct {
// contains filtered or unexported fields
}
Preprocessor splits a list of documents into chunks.
func NewPreprocessor ¶
func NewPreprocessor(cfg *PreprocessorConfig) *Preprocessor
func (*Preprocessor) Preprocess ¶
func (p *Preprocessor) Preprocess(docs ...*Document) (map[string][]*Chunk, error)
type PreprocessorConfig ¶
type PreprocessorConfig struct {
// ChunkTokenNum is the number of tokens for each text chunk.
// Defaults to 200.
ChunkTokenNum int
// MinChunkCharNum is the minimum number of characters for each text chunk.
// Defaults to 350.
MinChunkCharNum int
// MinChunkLenToEmbed is the minimum length in characters.
// Chunks with shorter length will be discarded.
//
// Defaults to 5.
MinChunkLenToEmbed int
// MaxChunkNum is the maximum number of chunks to generate from a text.
// Defaults to 10000.
MaxChunkNum int
}
type PromptData ¶
type PromptTemplate ¶
type PromptTemplate string
type Similarity ¶
type Turn ¶
type Turn struct {
Question string `json:"question,omitempty"`
Answer string `json:"answer,omitempty"`
}
Turn represents a round of dialogue.
Source Files
¶
Click to show internal directories.
Click to hide internal directories.
