initial commit
This commit is contained in:
74
.gitignore
vendored
Normal file
74
.gitignore
vendored
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a PyInstaller build script
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# VSCode
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Local development settings
|
||||||
|
.env.local
|
||||||
|
.env.development.local
|
||||||
|
|
||||||
|
# Log files
|
||||||
|
*.log
|
||||||
167
ollama-speedtest.py
Normal file
167
ollama-speedtest.py
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
import time
|
||||||
|
import ollama
|
||||||
|
import base64
|
||||||
|
import tabulate
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
import datetime
|
||||||
|
import asyncio
|
||||||
|
import concurrent.futures
|
||||||
|
|
||||||
|
async def generate_tokens_async(model_name, host_ip):
|
||||||
|
# Define the prompt to generate tokens
|
||||||
|
prompt = """Generate 1000 random words with no spaces, each word should be between 3-5 letters long. Separate them with line breaks.\n\n"""
|
||||||
|
|
||||||
|
# Set the host for ollama client
|
||||||
|
ollama.host = f"http://{host_ip}:11434"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Start timing the generation
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
# Use ollama client to generate response - run in a thread pool to avoid blocking
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as pool:
|
||||||
|
response = await loop.run_in_executor(
|
||||||
|
pool,
|
||||||
|
lambda: ollama.generate(model=model_name, prompt=prompt, stream=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate the time taken
|
||||||
|
end_time = time.time()
|
||||||
|
generation_time = end_time - start_time
|
||||||
|
|
||||||
|
# Get the generated content
|
||||||
|
generated_content = response['response']
|
||||||
|
|
||||||
|
# Estimate the number of tokens in the response
|
||||||
|
# Rough estimate: 1 token is approximately 4 characters for English text
|
||||||
|
estimated_tokens = len(generated_content) / 4
|
||||||
|
|
||||||
|
# Calculate tokens per second
|
||||||
|
tokens_per_second = estimated_tokens / generation_time
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"tokens_per_second": tokens_per_second,
|
||||||
|
"generation_time": generation_time,
|
||||||
|
"content_length": len(generated_content),
|
||||||
|
"estimated_tokens": estimated_tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error for {model_name} on {host_ip}: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
async def test_host(host, models):
|
||||||
|
"""Process all models for a single host sequentially"""
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for model in models:
|
||||||
|
print(f"Testing model: {model} on host: {host}")
|
||||||
|
|
||||||
|
# Run the test
|
||||||
|
result = await generate_tokens_async(model, host)
|
||||||
|
|
||||||
|
# Store the result
|
||||||
|
results.append({
|
||||||
|
"host": host,
|
||||||
|
"model": model,
|
||||||
|
"result": result
|
||||||
|
})
|
||||||
|
|
||||||
|
# Add a small delay between tests on same host
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def print_report(results):
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
# Create a table
|
||||||
|
table = Table(title=f"Ollama Performance Test Report - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
|
||||||
|
# Add columns
|
||||||
|
table.add_column("Host IP", style="cyan")
|
||||||
|
table.add_column("Model", style="green")
|
||||||
|
table.add_column("Tokens/Second", style="magenta")
|
||||||
|
table.add_column("Generation Time (s)", style="yellow")
|
||||||
|
table.add_column("Content Length", style="blue")
|
||||||
|
table.add_column("Status", style="red")
|
||||||
|
|
||||||
|
# Add rows
|
||||||
|
for result in results:
|
||||||
|
host = result["host"]
|
||||||
|
model = result["model"]
|
||||||
|
|
||||||
|
if result["result"]["success"]:
|
||||||
|
tokens_per_second = f"{result['result']['tokens_per_second']:.2f}"
|
||||||
|
generation_time = f"{result['result']['generation_time']:.2f}"
|
||||||
|
content_length = str(result['result']['content_length'])
|
||||||
|
status = "✅ Success"
|
||||||
|
else:
|
||||||
|
tokens_per_second = "N/A"
|
||||||
|
generation_time = "N/A"
|
||||||
|
content_length = "N/A"
|
||||||
|
status = f"❌ Failed: {result['result']['error']}"
|
||||||
|
|
||||||
|
table.add_row(host, model, tokens_per_second, generation_time, content_length, status)
|
||||||
|
|
||||||
|
# Print the table
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
# Print summary statistics if there are successful results
|
||||||
|
successful_results = [r for r in results if r["result"]["success"]]
|
||||||
|
if successful_results:
|
||||||
|
summary_table = Table(title="Summary Statistics")
|
||||||
|
summary_table.add_column("Metric", style="cyan")
|
||||||
|
summary_table.add_column("Value", style="green")
|
||||||
|
|
||||||
|
avg_tokens_per_second = sum(r["result"]["tokens_per_second"] for r in successful_results) / len(successful_results)
|
||||||
|
fastest_host_model = max(successful_results, key=lambda x: x["result"]["tokens_per_second"])
|
||||||
|
slowest_host_model = min(successful_results, key=lambda x: x["result"]["tokens_per_second"])
|
||||||
|
|
||||||
|
summary_table.add_row("Average Tokens/Second", f"{avg_tokens_per_second:.2f}")
|
||||||
|
summary_table.add_row("Fastest Configuration",
|
||||||
|
f"{fastest_host_model['host']} with {fastest_host_model['model']} " +
|
||||||
|
f"({fastest_host_model['result']['tokens_per_second']:.2f} tokens/s)")
|
||||||
|
summary_table.add_row("Slowest Configuration",
|
||||||
|
f"{slowest_host_model['host']} with {slowest_host_model['model']} " +
|
||||||
|
f"({slowest_host_model['result']['tokens_per_second']:.2f} tokens/s)")
|
||||||
|
|
||||||
|
console.print(summary_table)
|
||||||
|
|
||||||
|
async def main_async():
|
||||||
|
# Define the test matrix
|
||||||
|
test_matrix = {
|
||||||
|
"localhost": ["llama3.2:3b-instruct-q4_0"],
|
||||||
|
"192.168.50.3": ["llama3.2:3b-instruct-q4_0", "llama3.1:8b-instruct-q4_0", "llama3.1:8b-instruct-q8_0"],
|
||||||
|
"192.168.50.121": ["llama3.2:3b-instruct-q4_0", "llama3.1:8b-instruct-q4_0", "llama3.1:8b-instruct-q8_0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create tasks to test each host in parallel
|
||||||
|
tasks = []
|
||||||
|
for host, models in test_matrix.items():
|
||||||
|
task = asyncio.create_task(test_host(host, models))
|
||||||
|
tasks.append(task)
|
||||||
|
|
||||||
|
# Wait for all tasks to complete
|
||||||
|
host_results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# Flatten results
|
||||||
|
all_results = []
|
||||||
|
for result_list in host_results:
|
||||||
|
all_results.extend(result_list)
|
||||||
|
|
||||||
|
# Print the report
|
||||||
|
print_report(all_results)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Run the async main function
|
||||||
|
asyncio.run(main_async())
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user