Skip to content

Instantly share code, notes, and snippets.

@mklement0
Last active June 8, 2023 01:37
Show Gist options
  • Save mklement0/e8cabb620342af37ae7d0faecba7d588 to your computer and use it in GitHub Desktop.
Save mklement0/e8cabb620342af37ae7d0faecba7d588 to your computer and use it in GitHub Desktop.
PowerShell benchmarks for specific Stack Overflow questions
# Benchmarks for the answers at https://stackoverflow.com/q/75174855/45375.
# Specify how many sample data rows to use (use multiples of 10)
$totalRowCount = 1000
# How many runs to average.
# Note: With values above 15 you'll start to see the effects of on-demand compilation.
$runCount = 10
# Download and define function `Time-Command` on demand (will prompt).
# To be safe, inspect the source code at the specified URL first.
if (-not (Get-Command -ErrorAction Ignore Time-Command)) {
$gistUrl = 'https://gist.github.com/mklement0/9e1f13978620b09ab2d15da5535d1b27/raw/Time-Command.ps1'
if ((Read-Host "`n====`n OK to download and define benchmark function ``Time-Command```n from Gist ${gistUrl}?`n=====`n(y/n)?").Trim() -notin 'y', 'yes') { Write-Warning 'Aborted.'; exit 2 }
Invoke-RestMethod $gistUrl | Invoke-Expression 3>$null
if (-not ${function:Time-Command}) { exit 2 }
}
# Create a temporary file with sample input data.
$rows = @(
'"1011","01/16/2023","1/16/23 11:04 PM","1/17/23 6:52 AM"'
'"1012","01/16/2023","1/16/23 11:18 PM","1/17/23 6:05 AM"'
'"1012","01/17/2023","1/17/23 10:49 PM","1/18/23 7:26 AM"'
'"1021","01/16/2023","1/16/23 11:18 PM","1/17/23 6:04 AM"'
'"1021","01/17/2023","1/17/23 10:46 PM","1/18/23 8:12 AM"'
'"10261","01/16/2023","1/16/23 6:02 AM","1/16/23 12:01 PM"'
'"10262","01/18/2023","1/18/23 6:03 AM","1/18/23 12:02 PM"'
'"10263","01/18/2023","1/18/23 11:04 AM","1/19/23 00:03 AM"'
'"10262","01/19/2023","1/19/23 11:05 PM","1/20/23 07:03 AM"'
'"1011","01/19/2023","1/19/23 2:06 PM","1/19/23 11:04 PM"'
)
# Multiplication factor for the static array of sample rows.
# Note: Total row count won
$multiplier = $totalRowCount / 10
# Save to file...
Set-Content ($tempFile = New-TemporaryFile).FullName -Value '"Employee Id","Work Date","In","Out"', ($rows * $multiplier)
# ... and warm up the cache.
$null = Get-Content -Raw $tempFile
# Define script blocks with the commands to time.
$commands = @(
{ # @mklement0, from https://stackoverflow.com/a/75176143/45375
$hourMap = [ordered] @{}
Import-Csv $tempFile |
ForEach-Object {
# Get the in an out timestamps as [datetime] instances,
# reset to the start of the hour.
$in, $out =
($_.In, $_.Out).ForEach({ Get-Date $_ -Minute 0 -Second 0 -Millisecond 0 })
# Loop over all hours in the time between in and out.
$timestamp = $in
while ($timestamp -le $out) {
# For the timestamp's calendar day, create a 24-element array
# representing the hours of the day; a given element's value will
# receive the count of clocked-in employees for that hour.
if (-not $hourMap.Contains($timestamp.Date)) {
$hourMap[$timestamp.Date] = [int[]]::new(24)
}
$hourMap[$timestamp.Date][$timestamp.Hour]++
$timestamp = $timestamp.AddHours(1)
}
}
# Loop over all $hourMap entries, each representing a calendar day
$hourMap.GetEnumerator() |
ForEach-Object {
foreach ($hour in 0..23) { # For each calendar day, loop over all hours.
[pscustomobject] @{
WorkDate = $_.Key.ToString('d')
Hour = $hour
Count = $_.Value[$hour]
}
}
} |
ConvertTo-Csv
}
{ # @jdweng, from https://stackoverflow.com/a/75176209/45375
$table = Import-Csv $tempFile
foreach($row in $table)
{
$startDate = ([System.DateTime]::ParseExact($row.In,"M/d/yy h:mm tt",$null))
#for count to work properly you need to truncate minutes
$startDate = $startDate.AddMinutes(-$startDate.Minute)
$row | Add-Member -NotePropertyName InDate -NotePropertyValue $startDate
$row | Add-Member -NotePropertyName OutDate -NotePropertyValue ([System.DateTime]::ParseExact($row.Out,"M/d/yy h:mm tt",$null))
}
$minDate = $table | Measure-Object -Property InDate -Minimum
$maxDate = $table | Measure-Object -Property OutDate -Maximum
#subtract minutes to get hour
$minDate = $minDate.minimum.AddMinutes(-$minDate.minimum.Minute)
$maxDate = $maxDate.maximum.AddMinutes(-$maxDate.maximum.Minute)
$out_table = [System.Collections.ArrayList]::new()
for($date = $minDate; $date -le $maxDate; $date = $date.AddHours(1))
{
$numEmployees = @($table | Where-Object { ($_.InDate -le $date) -and ($_.OutDate -ge $date)})
$newRow = New-Object -TypeName psobject
$newRow | Add-Member -NotePropertyName Hour -NotePropertyValue $date
$newRow | Add-Member -NotePropertyName Count -NotePropertyValue $numEmployees.Count
$out_table.Add($newRow) | Out-Null
}
$out_table
}
)
Write-Verbose -Verbose "Running benchmarks with $totalRowCount sample rows, averaging $runCount runs..."
# Add -OutputToHost to print script-block output
Time-Command -Count $runCount $commands
# Clean up.
$tempFile | Remove-Item
# Benchmarks for the solutions at https://stackoverflow.com/q/75347681/45375
# Specify how many sample data lines to use (use multiples of 10)
$totalLineCount = 10000
# How many test runs to average.
# Note: With values above 15 you'll start to see the effects of on-demand compilation.
$runCount = 10
# Download and define function `Time-Command` on demand (will prompt).
# To be safe, inspect the source code at the specified URL first.
if (-not (Get-Command -ErrorAction Ignore Time-Command)) {
$gistUrl = 'https://gist.github.com/mklement0/9e1f13978620b09ab2d15da5535d1b27/raw/Time-Command.ps1'
if ((Read-Host "`n====`n OK to download and define benchmark function ``Time-Command```n from Gist ${gistUrl}?`n=====`n(y/n)?").Trim() -notin 'y', 'yes') { Write-Warning 'Aborted.'; exit 2 }
Invoke-RestMethod $gistUrl | Invoke-Expression 3>$null
if (-not ${function:Time-Command}) { exit 2 }
}
# 10 sample lines.
$tenLines = @'
ALPHA-FETOPROTEIN ROUTINE CH 0203 001 02/03/2023@10:45 LIVERF3
###-##-#### #######,#### In lab
ALPHA-FETOPROTEIN ROUTINE CH 0203 234 02/03/2023@11:05 LIVER
###-##-#### ########,######## In lab
ANION GAP STAT CH 0203 124 02/03/2023@11:06 DAY
###-##-#### ######,##### #### In lab
BASIC METABOLIC PANE ROUTINE CH 0203 001 02/03/2023@10:45 LIVERF3
###-##-#### #######,#### ###### In lab
BASIC METABOLIC PANE ROUTINE CH 0204 002 02/03/2023@10:45 LIVERF4
###-##-#### #######,#### ###### In lab
'@ -split '\r?\n'
# Fill the sample input files with the requested number of lines.
Set-Content ($tempInFile = New-TemporaryFile).FullName -Value (, $tenLines * ($totalLineCount / 10))
# Obtain a sample output-file path.
$tempOutFile = New-TemporaryFile
# Warm up the cache.
$null = Get-Content -Raw $tempInFile
# Define the solutions to compare, as an array of script blocks.
$commands = @(
{ # switch statement, in pipeline (streaming)
& {
$i = 1
switch -File $tempInFile {
default {
if ($i++ % 2) { $firstLineInPair = $_ }
else { $firstLineInPair + ' ' + $_.TrimStart() }
}
}
} | Set-Content $tempOutFile
}
{ # switch statement, no pipeline (collect all output lines first)
Set-Content $tempOutFile -Value $(
$i = 1
switch -File $tempInFile {
default {
if ($i++ % 2) { $firstLineInPair = $_ }
else { $firstLineInPair + ' ' + $_.TrimStart() }
}
})
}
{ # -replace, in pipeline (streaming)
(Get-Content -Raw $tempInFile) -replace '(.+)\r?\n(?:\s+)(.+\r?\n)', '$1 $2' |
Set-Content $tempOutFile
}
{ # -replace, no pipeline (collect all output lines first)
Set-Content $tempOutFile -Value ((Get-Content -Raw $tempInFile) -replace '(.+)\r?\n(?:\s+)(.+\r?\n)', '$1 $2')
}
{ # Get-Content -ReadCount 2, in pipeline, (streaming)
Get-Content -ReadCount 2 $tempInFile |
ForEach-Object { $_[0] + ' ' + $_[1].TrimStart() } |
Set-Content $tempOutFile
}
{ # Get-Content -ReadCount 2, no pipeline (collect all output lines first)
Set-Content $tempOutFile -Value (
(Get-Content -ReadCount 2 $tempInFile).ForEach({ $_[0] + ' ' + $_[1].TrimStart() })
)
}
{ # .NET APIs (StreamReader, StreamWriter)
$reader = [System.IO.StreamReader]::new($tempInFile)
$writer = [System.IO.StreamWriter]::new($tempOutFile)
while (($line = $reader.ReadLine()) -ne $null) {
$secondLine = ""
if (!$reader.EndOfStream) { $secondLine = $reader.ReadLine() }
$writer.WriteLine($line + ' ' + $secondLine.TrimStart())
}
$reader.Close()
$writer.Close()
}
)
# Run the benchmarks.
Write-Verbose -Verbose "Running benchmarks ($($totalLineCount.ToString('N0')) input lines, averaged over $runCount runs)..."
# Add -OutputToHost to print script-block output
Time-Command -Count $runCount $commands
# Clean up.
$tempInFile, $tempOutFile | Remove-Item
# Benchmarks for a specific answer at https://stackoverflow.com/q/76011142/45375
# This Gist compares the performance of:
# * @jdweng's solution at https://stackoverflow.com/a/76011282/45375
# * with a PowerShell-idiomatic equivalent.
#
# On a Windows 11 22H2 machine running Windows PowerShell 5.1,
# the PowerShell-idiomatic solution is about 50(!) times faster, as
# the following sample output shows:
# VERBOSE: Running benchmarks with 1000 input lines...
#
# Factor Secs (15-run avg.) Command
# ------ ------------------ -------
# 1.00 0.013 # The equivalent PowerShell-idiomatic solution...
# 48.68 0.635 # @jdweng's solution at https://stackoverflow.com/a/76011282/45375...
# Download and define function `Time-Command` on demand (will prompt).
# To be safe, inspect the source code at the specified URL first.
if (-not (Get-Command -ErrorAction Ignore Time-Command)) {
$gistUrl = 'https://gist.github.com/mklement0/9e1f13978620b09ab2d15da5535d1b27/raw/Time-Command.ps1'
if ((Read-Host "`n====`n OK to download and define benchmark function ``Time-Command```n from Gist ${gistUrl}?`n=====`n(y/n)?").Trim() -notin 'y', 'yes') { Write-Warning 'Aborted.'; exit 2 }
Invoke-RestMethod $gistUrl | Invoke-Expression 3>$null
if (-not ${function:Time-Command}) { exit 2 }
}
# Construct the input data:
# Specify the number of input lines (strings).
$count = 1e3
# Construct sample strings.
$inputStrings = (, 'a.b.c.d=rest') * $count
$results = [ordered] @{
List1 = $null
List2 = $null
}
# The solutions to compare, specified as script blocks ({ ... })
$commandsToCompare = @(
{ # @jdweng's solution at https://stackoverflow.com/a/76011282/45375
$results.List1 = $table = [System.Collections.ArrayList]::new()
foreach($row in $inputStrings)
{
if($row.Contains('='))
{
$newRow = New-Object -TypeName psobject
$newRow | Add-Member -NotePropertyName column1 -NotePropertyValue 'abcdefg'
$splitRow = $row.Split('=')
$splitPeriod = $splitRow[0].Split('.')
$newRow | Add-Member -NotePropertyName column2 -NotePropertyValue $splitPeriod[0]
$newRow | Add-Member -NotePropertyName column3 -NotePropertyValue $splitPeriod[1]
$newRow | Add-Member -NotePropertyName column4 -NotePropertyValue $splitRow[1]
$table.Add($newRow) | Out-Null
}
}
}
{ # The equivalent PowerShell-idiomatic solution
$results.List2 = foreach ($row in $inputStrings) {
if($row.Contains('=')) {
$splitRow = $row.Split('=')
$splitPeriod = $splitRow[0].Split('.')
[pscustomobject] @{
column1 = 'abcdefg'
column2 = $splitPeriod[0]
column3 = $splitPeriod[1]
column4 = $splitRow[1]
}
}
}
}
)
# Run the benchmarks (15-run average).
Write-Verbose -Verbose "Running benchmarks with $count input lines..."
Time-Command $commandsToCompare
# Make sure that the commands yielded the same results:
if ((Compare-Object $results.List1 $results.List2 -Property $results.List1[0].psobject.Properties.Name)) {
throw "Test commands unexpectedly yielded different results."
}
# Benchmarks for https://stackoverflow.com/q/76419365/45375
# Download and define function `Time-Command` on demand (will prompt).
# To be safe, inspect the source code at the specified URL first.
if (-not (Get-Command -ErrorAction Ignore Time-Command)) {
$gistUrl = 'https://gist.github.com/mklement0/9e1f13978620b09ab2d15da5535d1b27/raw/Time-Command.ps1'
if ((Read-Host "`n====`n OK to download and define benchmark function ``Time-Command```n from Gist ${gistUrl}?`n=====`n(y/n)?").Trim() -notin 'y', 'yes') { Write-Warning 'Aborted.'; exit 2 }
Invoke-RestMethod $gistUrl | Invoke-Expression 3>$null
if (-not ${function:Time-Command}) { exit 2 }
}
# Specify how many sample data lines to use:
$totalLineCount = 1e6 # 1 million
# Create a temporary file with the requested number of - short - lines:
$tempFile = New-TemporaryFile
1..$totalLineCount > $tempFile
# Warm up the cache:
$null = Get-Content -Raw $tempFile
# Run the benchmarks:
# Note:
# * Averages 15 runs by default; change with -Count $n
# * $n > 16 may distort the results to due to JIT compilation.
Time-Command {
[System.Linq.Enumerable]::Count(
[System.IO.File]::ReadLines((Convert-Path $tempFile))
)
}, {
[System.IO.File]::ReadAllLines((Convert-Path $tempFile)).Length
}, {
wc -l $tempFile
}, {
$c = 0; switch -File $tempFile { default { ++$c } }; $c
}
# Clean up.
Remove-Item $tempFile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment