Click here to Skip to main content
15,919,931 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
I have 2 storage accounts and I am copying file in the source. then from source, the files are getting replicated into destination. I want to get the Max , Min and Avg latency of files.

Example:
in source: there are 3 files and replicated into destination.
Now the datediff between each files lastModified in source and destination and get the Max, Min, Avg latency.

The query I am using, is working for very small amount of data and smaller duration. When I am trying to get for last 3 days, is giving error.

Note: in each copy iteration, almost 5 lacs files are copied to source.

What I have tried:

let Srcsubstring = "https://xxxxxxxxx.blob.core.windows.net:443/test/";
let Destsubstring = "https://xxxxxxxxx.blob.core.windows.net:443/test/";
let substring2 = "?";
let substring3 = "_";

// Step 1: Filter logs within the lookback period, preprocess URIs, and extract prefixes
let filteredLogs = materialize(
    StorageBlobLogs
    | where AccountName in ("xxxxxxxxxx", "xxxxxxxxxx")
    | where OperationName in ("CopyBlob", "PutBlob")
    // Remove source and destination substrings
    | extend Uri = trim_start(Srcsubstring, Uri)
    | extend Uri = trim_start(Destsubstring, Uri)
    // Truncate at question mark
    | extend Uri = substring(Uri, 0, indexof(Uri, substring2))
    // Extract prefix
    | extend prefix = substring(Uri, 0, indexof(Uri, substring3))
    | project Uri, LastModifiedTime, AccountName, OperationName, RequestBodySize, TimeGenerated, prefix
);

// Step 2: Calculate replication latency
let replicationLatency = materialize(
    filteredLogs
    | summarize 
        Source_LastModifiedTime = maxif(LastModifiedTime, OperationName == "PutBlob"),
        Destination_LastModifiedTime = maxif(LastModifiedTime, OperationName == "CopyBlob")
        by prefix, Uri
    | extend ReplicationLatency = datetime_diff('second', Destination_LastModifiedTime, Source_LastModifiedTime)
    | summarize 
        Max_ReplicationLatency = max(ReplicationLatency),
        Min_ReplicationLatency = min(ReplicationLatency),
        Avg_ReplicationLatency = avg(ReplicationLatency)
        by prefix
);

// Step 3: Summarize logs by prefix
let logSummarization = materialize(
    filteredLogs
    | summarize 
        TimeGenerated = min(TimeGenerated),
        Max_TimeGenerated = max(TimeGenerated),
        Min_LastModifiedTime = min(LastModifiedTime),
        Max_LastModifiedTime = max(LastModifiedTime),
        Source_MinLastModifiedTime = minif(LastModifiedTime, OperationName == "PutBlob"),
        Source_MaxLastModifiedTime = maxif(LastModifiedTime, OperationName == "PutBlob"),
        Destination_MinLastModifiedTime = minif(LastModifiedTime, OperationName == "CopyBlob"),
        Destination_MaxLastModifiedTime = maxif(LastModifiedTime, OperationName == "CopyBlob"),
        PutBlob_Ingress_Gb = sumif(RequestBodySize, OperationName == "PutBlob") / 1024 / 1024 / 1024,
        CopyBlob_Ingress_Gb = sumif(RequestBodySize, OperationName == "CopyBlob") / 1024 / 1024 / 1024,
        Source_FileCount = dcountif(Uri, OperationName == "PutBlob"),
        Destination_FileCount = dcountif(Uri, OperationName == "CopyBlob"),
        Src_AccountName = maxif(AccountName, OperationName == "PutBlob"),
        Dest_AccountName = maxif(AccountName, OperationName == "CopyBlob")
        by prefix
);

// Step 4: Join summarized data and project the required fields
replicationLatency
| join kind=inner (logSummarization) on prefix
| project 
    prefix,
    TimeGenerated,
    Src_AccountName,
    Dest_AccountName,
    Source_MinLastModifiedTime,
    Source_MaxLastModifiedTime,
    Destination_MinLastModifiedTime,
    Destination_MaxLastModifiedTime,
    StartOff_Latency = datetime_diff('second', Destination_MinLastModifiedTime, Source_MinLastModifiedTime),
    PutBlob_Ingress_Gb,
    CopyBlob_Ingress_Gb,
    Source_FileCount,
    Destination_FileCount,
    FileDifference = Source_FileCount - Destination_FileCount,
    Replication_Latency_seconds = datetime_diff('second', Destination_MaxLastModifiedTime, Source_MaxLastModifiedTime),
    Max_ReplicationLatency,
    Min_ReplicationLatency,
    Avg_ReplicationLatency
| order by TimeGenerated desc
Posted

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900