- Implement streaming lc3 without the usage of files - use pydantic for config management Reviewed-on: https://gitea.pstruebi.xyz/auracaster/multilang-translator-local/pulls/2
92 lines
2.8 KiB
Python
92 lines
2.8 KiB
Python
import time
|
|
import requests
|
|
import json
|
|
import logging as log
|
|
import time
|
|
import ollama
|
|
|
|
from multilang_translator.translator import syspromts
|
|
|
|
# ollama.create( # TODO: create models on startup
|
|
# model='example',
|
|
# from_='llama3.2', system="You are Mario from Super Mario Bros."
|
|
# )
|
|
|
|
async def chat():
|
|
message = {'role': 'user', 'content': 'Why is the sky blue?'}
|
|
response = await ollama.AsyncClient().chat(model='llama3.2', messages=[message])
|
|
|
|
|
|
def query_openwebui(model, system, query, url, token):
|
|
url = f'{url}/api/chat/completions'
|
|
headers = {
|
|
'Authorization': f'Bearer {token}',
|
|
}
|
|
payload = {
|
|
'model': model,
|
|
'messages': [
|
|
{'role': 'system', 'content': system},
|
|
{'role': 'user', 'content': query}
|
|
],
|
|
}
|
|
start = time.time()
|
|
response = requests.post(url, headers=headers, json=payload)
|
|
log.info("Translating the text took %s s", round(time.time() - start, 2))
|
|
return response.json()['choices'][0]['message']['content']
|
|
|
|
|
|
def query_ollama(model, system, query, host='http://localhost:11434'):
|
|
client = ollama.Client(
|
|
host=host,
|
|
)
|
|
|
|
response = client.chat(
|
|
model = model,
|
|
messages = [
|
|
{'role': 'system', 'content': system},
|
|
{'role': 'user', 'content': query}
|
|
],
|
|
)
|
|
return response.message.content
|
|
|
|
|
|
def translate_de_to_x( # TODO: use async ollama client later - implenent a translate async function
|
|
text:str,
|
|
target_language: str,
|
|
client='ollama',
|
|
model='llama3.2:3b-instruct-q4_0', # remember to use instruct models
|
|
host = None,
|
|
token = None
|
|
):
|
|
start=time.time()
|
|
s = getattr(syspromts, f"TRANSLATOR_DEU_{target_language.upper()}")
|
|
|
|
if client == 'ollama':
|
|
response = query_ollama(model, s, text, host=host)
|
|
elif client == 'openwebui':
|
|
response = query_openwebui(model, s, text, url=host, token=token)
|
|
else: raise NotImplementedError('llm client not implemented')
|
|
|
|
log.info('Running the translator to %s took %s s', target_language, round(time.time() - start, 3))
|
|
return response
|
|
|
|
if __name__ == "__main__":
|
|
import time
|
|
from multilang_translator.translator import test_content
|
|
|
|
|
|
start=time.time()
|
|
response = translate_de_to_x('Der Zug ist da.', target_language='en', model='llama3.2:1b-instruct-q4_0')
|
|
print("Query took", time.time() - start)
|
|
print(response)
|
|
|
|
start=time.time()
|
|
response = translate_de_to_x(test_content.TESTSENTENCE_DE_RAINBOW, target_language='en')
|
|
print("query took", time.time() - start)
|
|
print(response)
|
|
|
|
start=time.time()
|
|
response = translate_de_to_x(test_content.TESTSENTENCE_DE_RAINBOW, target_language='fr')
|
|
print("query took", time.time() - start)
|
|
print(response)
|