speedHelper.py

Documentation: edge-infra.dev/hack/datasync

     1import subprocess
     2import json
     3import time
     4import csv
     5from datetime import datetime
     6import argparse
     7import base64
     8
     9# Parse command-line arguments for data logging parameters
    10parser = argparse.ArgumentParser(description='Parameters for data logging')
    11parser.add_argument('-mj', '--maxjobs', type=int, required=True, help="Max Jobs parameter value")
    12parser.add_argument('-c', '--churn', type=int, required=True, help="Churn parameter value")
    13parser.add_argument('-i', '--interval', type=int, required=True, help="Interval parameter value")
    14parser.add_argument('-u', '--username', type=str, required=True, help="Username for authentication")
    15parser.add_argument('-p', '--password', type=str, required=True, help="Password for authentication")
    16parser.add_argument('-db', '--filterByDatabase', type=str, required=False, help="Filter by specific database ID (optional)")
    17parser.add_argument('-s', '--serverURL', type=str, default="http://localhost:5984/", help="Server URL (optional, default is http://localhost:5984/)")
    18args = parser.parse_args()
    19
    20# Encode the username and password into base64
    21authentication_token = base64.b64encode(f"{args.username}:{args.password}".encode()).decode()
    22
    23# Send a curl request to get scheduler data from CouchDB
    24cmd = ["curl", "--location", f'{args.serverURL}_scheduler/docs', 
    25        "--header", f'Authorization: Basic {authentication_token}']
    26
    27while True:
    28    result = subprocess.run(cmd, stdout=subprocess.PIPE)
    29    data = json.loads(result.stdout)
    30    
    31    with open('output.csv', 'a', newline='') as f:
    32        writer = csv.writer(f)
    33        for doc in data['docs']:
    34            doc_id = doc['doc_id']
    35            if args.filterByDatabase:
    36                if doc_id != args.filterByDatabase:
    37                    continue
    38            else:
    39                print("doc_id is", doc_id)
    40
    41            docs_written = doc['info']['docs_written']
    42            start_time = datetime.strptime(doc['start_time'], "%Y-%m-%dT%H:%M:%SZ")
    43            time_elapsed = -(datetime.now() - start_time).total_seconds()
    44            speed = docs_written/time_elapsed
    45
    46            # Log the speed of document processing
    47            print("Speed is", speed, "documents per second")
    48            writer.writerow([datetime.now(), doc_id, docs_written, speed, args.maxjobs, args.churn, args.interval])
    49
    50    # Adjust time between iterations here
    51    time.sleep(15)
View as plain text