Created
August 28, 2023 18:04
-
-
Save acidsound/187d8f917027bcfbcfb2434bce529bce to your computer and use it in GitHub Desktop.
exllama 사용법 +qdrant 색인&검색
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from qdrant_client import QdrantClient | |
from qdrant_client.models import Distance, VectorParams | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from qdrant_client.models import PointStruct | |
# Initialize the client | |
client = QdrantClient("localhost:6333") | |
COLLECTION_NAME = "docs" | |
client.recreate_collection( | |
collection_name=COLLECTION_NAME, | |
vectors_config=VectorParams(size=384, distance=Distance.COSINE), | |
) | |
model = SentenceTransformer("paraphrase-multilingual-mpnet-base-v2", device="cuda") | |
sentences = ['This framework generates embeddings for each input sentence', | |
'Sentences are passed as a list of string.', | |
'The quick brown fox jumps over the lazy dog.'] | |
sentence_embeddings = model.encode(sentences) | |
client.upsert( | |
collection_name="docs", | |
points=[ | |
PointStruct( | |
id=idx, | |
vector=vector.tolist(), | |
payload={"color": "red", "rand_number": idx % 10} | |
) | |
for idx, vector in enumerate(sentence_embeddings) | |
] | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from exllama.model import ExLlama, ExLlamaCache, ExLlamaConfig | |
from exllama.lora import ExLlamaLora | |
from exllama.tokenizer import ExLlamaTokenizer | |
from exllama.generator import ExLlamaGenerator | |
import torch | |
import datetime | |
def log(message): | |
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
print(f"[{timestamp}] {message}") | |
torch.set_grad_enabled(False) | |
torch.cuda._lazy_init() | |
config_path = "./models/Scarlett-13B-GPTQ" | |
config = ExLlamaConfig(f"{config_path}/config.json") | |
config.model_path = f"{config_path}/model.safetensors" | |
config.model_path = f"{config_path}/model.safetensors" | |
model = ExLlama(config) | |
log(">>> model loading...") | |
model = ExLlama(config) | |
log(">>> model caching...") | |
cache = ExLlamaCache(model) | |
log(">>> model tokenizing...") | |
tokenizer = ExLlamaTokenizer(f"{config_path}/tokenizer.model") | |
log(">>> model generating...") | |
generator = ExLlamaGenerator(model, tokenizer, cache) | |
# generator.disallow_tokens([tokenizer.eos_token_id]) | |
# generator.settings.token_repetition_penalty_max = 1.2 | |
# generator.settings.temperature = 0.95 | |
# generator.settings.top_p = 0.65 | |
# generator.settings.top_k = 100 | |
# generator.settings.typical = 0.5 | |
# Produce a simple generation | |
log(">>> make simple autocomplete..") | |
prompt = "Once upon a time," | |
log (prompt) | |
output = generator.generate_simple(prompt, max_new_tokens = 200) | |
log(output[len(prompt):]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment