Merge remote-tracking branch 'marcelowt2/queuelat' into unstable/devel/v1.2
Merging queuelat into rt-tests Signed-off-by: John Kacur <jkacur@redhat.com>
This commit is contained in:
commit
62d2e8b40c
7
Makefile
7
Makefile
|
@ -16,7 +16,8 @@ sources = cyclictest.c \
|
|||
sigwaittest.c \
|
||||
svsematest.c \
|
||||
cyclicdeadline.c \
|
||||
deadline_test.c
|
||||
deadline_test.c \
|
||||
queuelat.c
|
||||
|
||||
TARGETS = $(sources:.c=)
|
||||
LIBS = -lrt -lpthread
|
||||
|
@ -86,6 +87,7 @@ VPATH += src/backfire:
|
|||
VPATH += src/lib:
|
||||
VPATH += src/hackbench:
|
||||
VPATH += src/sched_deadline:
|
||||
VPATH += src/queuelat:
|
||||
|
||||
$(OBJDIR)/%.o: %.c | $(OBJDIR)
|
||||
$(CC) -D VERSION=$(VERSION) -c $< $(CFLAGS) $(CPPFLAGS) -o $@
|
||||
|
@ -146,6 +148,9 @@ pip_stress: $(OBJDIR)/pip_stress.o $(OBJDIR)/librttest.a
|
|||
hackbench: $(OBJDIR)/hackbench.o
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS)
|
||||
|
||||
queuelat: $(OBJDIR)/queuelat.o $(OBJDIR)/librttest.a
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(RTTESTLIB)
|
||||
|
||||
LIBOBJS =$(addprefix $(OBJDIR)/,error.o rt-get_cpu.o rt-sched.o rt-utils.o)
|
||||
$(OBJDIR)/librttest.a: $(LIBOBJS)
|
||||
$(AR) rcs $@ $^
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
queuelat: queuelat.o
|
||||
cc -o queuelat queuelat.o
|
||||
|
||||
queuelat.o: queuelat.c
|
||||
cc -c -Wall queuelat.c
|
||||
|
||||
clean:
|
||||
rm queuelat queuelat.o
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
|
||||
cyclictest does not catch all cases where packet forwarding
|
||||
latency can exceed a given threshold.
|
||||
|
||||
Example:
|
||||
|
||||
# taskset -c 3 ./queuelat -m 20000 -c 100 -p 13 -f `sh ./get_cpuinfo_mhz.sh`
|
||||
|
||||
# rmmod targeted-ipi; insmod ./targeted-ipi.ko ipidest=3 nripis=200 interval=2 delay=10
|
||||
|
||||
<...>-4566 [003] .....11 4474.559880: tracing_mark_write: memmove block queue_size=28 queue_dec=279 queue_inc=307 delta=23685 ns
|
||||
<...>-4566 [003] .....11 4474.559912: tracing_mark_write: memmove block queue_size=63 queue_dec=279 queue_inc=314 delta=24198 ns
|
||||
<...>-4566 [003] .....11 4474.559937: tracing_mark_write: memmove block queue_size=97 queue_dec=279 queue_inc=313 delta=24090 ns
|
||||
<...>-4566 [003] .....11 4474.559965: tracing_mark_write: memmove block queue_size=130 queue_dec=279 queue_inc=312 delta=24048 ns
|
||||
<...>-4566 [003] .....11 4474.559993: tracing_mark_write: memmove block queue_size=162 queue_dec=279 queue_inc=311 delta=23957 ns
|
||||
<...>-4566 [003] .....11 4474.560018: tracing_mark_write: memmove block queue_size=193 queue_dec=279 queue_inc=310 delta=23912 ns
|
||||
<...>-4566 [003] .....11 4474.560046: tracing_mark_write: memmove block queue_size=225 queue_dec=279 queue_inc=311 delta=23965 ns
|
||||
<...>-4566 [003] .....11 4474.560074: tracing_mark_write: memmove block queue_size=257 queue_dec=279 queue_inc=311 delta=23971 ns
|
||||
<...>-4566 [003] .....11 4474.560102: tracing_mark_write: memmove block queue_size=288 queue_dec=279 queue_inc=310 delta=23902 ns
|
||||
<...>-4566 [003] .....11 4474.560127: tracing_mark_write: memmove block queue_size=320 queue_dec=279 queue_inc=311 delta=23945 ns
|
||||
<...>-4566 [003] .....11 4474.560155: tracing_mark_write: memmove block queue_size=351 queue_dec=279 queue_inc=310 delta=23921 ns
|
||||
<...>-4566 [003] .....11 4474.560180: tracing_mark_write: memmove block queue_size=381 queue_dec=279 queue_inc=309 delta=23839 ns
|
||||
<...>-4566 [003] .....11 4474.560208: tracing_mark_write: memmove block queue_size=412 queue_dec=279 queue_inc=310 delta=23876 ns
|
||||
<...>-4566 [003] .....11 4474.560236: tracing_mark_write: memmove block queue_size=443 queue_dec=279 queue_inc=310 delta=23886 ns
|
||||
<...>-4566 [003] .....11 4474.560261: tracing_mark_write: memmove block queue_size=474 queue_dec=279 queue_inc=310 delta=23901 ns
|
||||
<...>-4566 [003] .....11 4474.560288: tracing_mark_write: memmove block queue_size=505 queue_dec=279 queue_inc=310 delta=23891 ns
|
||||
<...>-4566 [003] .....11 4474.560316: tracing_mark_write: memmove block queue_size=535 queue_dec=279 queue_inc=309 delta=23822 ns
|
||||
<...>-4566 [003] .....11 4474.560341: tracing_mark_write: memmove block queue_size=565 queue_dec=279 queue_inc=309 delta=23815 ns
|
||||
<...>-4566 [003] .....11 4474.560353: tracing_mark_write: queue length exceeded: queue_size=565 max_queue_len=559
|
||||
|
||||
# taskset -c 3 cyclictest -m -n -q -p95 -D 60m -h60 -i 200
|
||||
|
||||
# rmmod targeted-ipi; insmod ./targeted-ipi.ko ipidest=3 nripis=20000 interval=2 delay=10
|
||||
|
||||
Cyclictest results:
|
||||
|
||||
# Total: 000068099
|
||||
# Min Latencies: 00001
|
||||
# Avg Latencies: 00002
|
||||
# Max Latencies: 00008
|
||||
# Histogram Overflows: 00000
|
||||
|
||||
|
||||
----- queuelat basics:
|
||||
|
||||
Queuelat simulates a DPDK queue. From queuelat.c:
|
||||
|
||||
Program parameters:
|
||||
max_queue_len: maximum latency allowed, in nanoseconds (int).
|
||||
cycles_per_packet: number of cycles to process one packet (int).
|
||||
mpps(million-packet-per-sec): million packets per second (float).
|
||||
tsc_freq_mhz: TSC frequency in MHz, as measured by TSC PIT calibration
|
||||
(search for "Detected XXX MHz processor" in dmesg, and use the integer part).
|
||||
|
||||
How it works
|
||||
============
|
||||
|
||||
The program in essence does:
|
||||
|
||||
b = rdtsc();
|
||||
memmove(dest, src, n);
|
||||
a = rdtsc();
|
||||
|
||||
delay = convert_to_ns(a - b);
|
||||
|
||||
queue_size += packets_queued_in(delay);
|
||||
queue_size -= packets_processed;
|
||||
|
||||
if (queue_size > max_queue_len)
|
||||
FAIL();
|
||||
|
||||
packets_processed is fixed, and is estimated as follows:
|
||||
n is determined first, so that the stats bucket with highest count
|
||||
takes max_latency/2.
|
||||
for max_latency/2, we calculate how many packets can be drained
|
||||
in that time (using cycles_per_packet).
|
||||
|
||||
Queuelat output
|
||||
===============
|
||||
|
||||
During calibration, queuelat outputs the following table:
|
||||
|
||||
[9600 - 9699] = 0 packetfillrates=[67 - 67]
|
||||
[9700 - 9799] = 7907 packetfillrates=[67 - 68]
|
||||
[9800 - 9899] = 42085 packetfillrates=[68 - 69]
|
||||
[9900 - 9999] = 7 packetfillrates=[69 - 69]
|
||||
[10000 - 10099] = 1 packetfillrates=[70 - 70]
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |_________ [min - max] number of
|
||||
| | | packets the queue will reach
|
||||
| | | with specified mpps in this
|
||||
| | | time (without draining)
|
||||
| | |
|
||||
| | |______________________________ number of hits for this
|
||||
| | bucket
|
||||
| |
|
||||
| |______________________________________ min amount of time (ns)
|
||||
| this bucket accepts
|
||||
|
|
||||
|______________________________________________ max amount of time (ns)
|
||||
this bucket accepts
|
||||
|
||||
|
||||
On success, queuelat outputs a table similar to cyclictest:
|
||||
|
||||
[7000 - 7099] = 0
|
||||
[7100 - 7199] = 2
|
||||
[7200 - 7299] = 2457
|
||||
[7300 - 7399] = 21058
|
||||
| | |
|
||||
| | |___________ Number of processing loops that hit this
|
||||
| | bucket.
|
||||
| |
|
||||
| |____________________ Maximum number of nanoseconds of this bucket.
|
||||
|
|
||||
|
|
||||
|___________________________ Minimum number of nanoseconds in this bucket.
|
||||
|
||||
That is a processing loop will account into a bucket if its duration
|
||||
is
|
||||
|
||||
min_number_ns_in_bucket < duration < max_number_ns_in_bucket
|
||||
|
||||
|
||||
Automatic determination of Mpps
|
||||
===============================
|
||||
|
||||
There is a script called determine_maximum_mpps.sh, which should be edited
|
||||
to include the pinning and -RT priority configuration for your machine.
|
||||
|
||||
PREAMBLE="taskset -c 2 chrt -f 1"
|
||||
MAXLAT="20000"
|
||||
CYCLES_PER_PACKET="300"
|
||||
|
||||
This script will find the maximum mpps parameter which can sustain:
|
||||
|
||||
1) 10 consecutive 30 second runs.
|
||||
2) 1 run of 10 minutes.
|
||||
|
||||
Without violating the latency specified with $MAXLAT.
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
#!/bin/bash
|
||||
|
||||
# A script to determine the maximum mpps. Logic:
|
||||
# Increase mpps in 0.5 units
|
||||
#
|
||||
# NOTE: please set "PREAMBLE" to the command line you use for
|
||||
#
|
||||
PREAMBLE="taskset -c 2 chrt -f 1"
|
||||
MAXLAT="20000"
|
||||
CYCLES_PER_PACKET="300"
|
||||
|
||||
echo "Determining maximum mpps the machine can handle"
|
||||
echo "Will take a few minutes to determine mpps value"
|
||||
echo "And 10 minutes run to confirm the final mpps value is stable"
|
||||
|
||||
for mpps in `seq 3 3 50`; do
|
||||
echo testing $mpps Mpps
|
||||
|
||||
outfile=`mktemp`
|
||||
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
|
||||
|
||||
exceeded=`grep exceeded $outfile`
|
||||
if [ ! -z "$exceeded" ]; then
|
||||
echo mpps failed: $mpps
|
||||
break;
|
||||
fi
|
||||
echo success
|
||||
done
|
||||
echo first loop mpps: $mpps
|
||||
|
||||
first_mpps=$(($mpps - 1))
|
||||
for mpps in `seq $first_mpps -1 3`; do
|
||||
echo testing $mpps Mpps
|
||||
|
||||
outfile=`mktemp`
|
||||
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
|
||||
|
||||
exceeded=`grep exceeded $outfile`
|
||||
if [ -z "$exceeded" ]; then
|
||||
echo mpps success $mpps
|
||||
break;
|
||||
fi
|
||||
echo failure
|
||||
done
|
||||
|
||||
second_mpps=`echo "$mpps + 0.3" | bc`
|
||||
echo second loop mpps: $mpps
|
||||
|
||||
for mpps in `seq $second_mpps 0.3 $first_mpps`; do
|
||||
echo testing $mpps Mpps
|
||||
|
||||
outfile=`mktemp`
|
||||
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
|
||||
|
||||
exceeded=`grep exceeded $outfile`
|
||||
if [ ! -z "$exceeded" ]; then
|
||||
echo mpps failure $mpps
|
||||
break;
|
||||
fi
|
||||
echo success
|
||||
done
|
||||
|
||||
echo third loop mpps: $mpps
|
||||
third_mpps=`echo "$mpps -0.1" | bc`
|
||||
|
||||
for mpps in `seq $third_mpps -0.1 3`; do
|
||||
echo testing $mpps Mpps
|
||||
|
||||
outfile=`mktemp`
|
||||
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
|
||||
|
||||
exceeded=`grep exceeded $outfile`
|
||||
if [ -z "$exceeded" ]; then
|
||||
echo mpps success $mpps
|
||||
break;
|
||||
fi
|
||||
echo failure
|
||||
done
|
||||
|
||||
export queuelat_failure=1
|
||||
while [ $queuelat_failure == 1 ]; do
|
||||
|
||||
export queuelat_failure=0
|
||||
|
||||
echo -n "Starting 10 runs of 30 seconds with "
|
||||
echo "$mpps Mpps"
|
||||
|
||||
for i in `seq 1 10`; do
|
||||
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
|
||||
exceeded=`grep exceeded $outfile`
|
||||
|
||||
if [ ! -z "$exceeded" ]; then
|
||||
echo "mpps failure (run $i) $mpps"
|
||||
export queuelat_failure=1
|
||||
export mpps=`echo $mpps - 0.1 | bc`
|
||||
break
|
||||
fi
|
||||
echo "run $i success"
|
||||
done
|
||||
|
||||
done
|
||||
|
||||
export queuelat_failure=1
|
||||
while [ $queuelat_failure == 1 ]; do
|
||||
|
||||
export queuelat_failure=0
|
||||
|
||||
echo -n "Starting 10 minutes run with "
|
||||
echo "$mpps Mpps"
|
||||
|
||||
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `./get_cpuinfo_mhz.sh` -p $mpps -t 600 > $outfile
|
||||
exceeded=`grep exceeded $outfile`
|
||||
|
||||
if [ ! -z "$exceeded" ]; then
|
||||
echo "mpps failure (run $i) $mpps"
|
||||
export queuelat_failure=1
|
||||
export mpps=`echo $mpps - 0.1 | bc`
|
||||
continue
|
||||
fi
|
||||
echo "run $i success"
|
||||
done
|
||||
|
||||
echo Final mpps is: $mpps
|
||||
|
||||
unset queuelat_failure
|
||||
unset mpps
|
||||
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
mhz=`cat /proc/cpuinfo | grep "cpu MHz" | uniq | cut -f 3 -d " "`
|
||||
echo $mhz
|
||||
|
|
@ -0,0 +1,635 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
|
||||
#define NSEC_PER_SEC 1000000000
|
||||
|
||||
/* Program parameters:
|
||||
* max_queue_len: maximum latency allowed, in nanoseconds (int).
|
||||
* cycles_per_packet: number of cycles to process one packet (int).
|
||||
* mpps(million-packet-per-sec): million packets per second (float).
|
||||
* tsc_freq_mhz: TSC frequency in MHz, as measured by TSC PIT calibration
|
||||
* (search for "Detected XXX MHz processor" in dmesg, and use the integer part).
|
||||
*
|
||||
* How it works
|
||||
* ============
|
||||
*
|
||||
* The program in essence does:
|
||||
*
|
||||
* b = rdtsc();
|
||||
* memmove(dest, src, n);
|
||||
* a = rdtsc();
|
||||
*
|
||||
* delay = convert_to_ns(a - b);
|
||||
*
|
||||
* queue_size += packets_queued_in(delay);
|
||||
* queue_size -= packets_processed;
|
||||
*
|
||||
* if (queue_size > max_queue_len)
|
||||
* FAIL();
|
||||
*
|
||||
* packets_processed is fixed, and is estimated as follows:
|
||||
* n is determined first, so that the stats bucket with highest count
|
||||
* takes max_latency/2.
|
||||
* for max_latency/2, we calculate how many packets can be drained
|
||||
* in that time (using cycles_per_packet).
|
||||
*
|
||||
*/
|
||||
|
||||
int maxlatency;
|
||||
int cycles_per_packet;
|
||||
float mpps;
|
||||
int timeout_secs;
|
||||
int min_queue_size_to_print;
|
||||
|
||||
/* Derived constants */
|
||||
|
||||
float cycles_to_ns;
|
||||
int max_queue_len;
|
||||
|
||||
int default_n;
|
||||
int nr_packets_drain_per_block;
|
||||
|
||||
/*
|
||||
* Parameters for the stats collection buckets
|
||||
*/
|
||||
|
||||
#define LAST_VAL 70000
|
||||
#define VALS_PER_BUCKET 100
|
||||
#define NR_BUCKETS LAST_VAL/VALS_PER_BUCKET
|
||||
|
||||
unsigned long long int buckets[NR_BUCKETS+1];
|
||||
unsigned long long int total_count;
|
||||
|
||||
#define OUTLIER_BUCKET NR_BUCKETS
|
||||
|
||||
static int val_to_bucket(unsigned long long val)
|
||||
{
|
||||
int bucket_nr = val / VALS_PER_BUCKET;
|
||||
if (bucket_nr >= NR_BUCKETS) {
|
||||
return OUTLIER_BUCKET;
|
||||
}
|
||||
return bucket_nr;
|
||||
}
|
||||
|
||||
static void account(unsigned long long val)
|
||||
{
|
||||
int bucket_nr = val_to_bucket(val);
|
||||
buckets[bucket_nr]++;
|
||||
total_count++;
|
||||
}
|
||||
|
||||
static unsigned long long total_samples(void)
|
||||
{
|
||||
int i;
|
||||
unsigned long long total = 0;
|
||||
|
||||
for (i = 0; i <= OUTLIER_BUCKET; i++) {
|
||||
total += buckets[i];
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static void print_all_buckets(void)
|
||||
{
|
||||
int i, print_dotdotdot = 0;
|
||||
|
||||
for (i = 0; i <= OUTLIER_BUCKET; i++) {
|
||||
int bucket_nr;
|
||||
unsigned long long val = i*VALS_PER_BUCKET;
|
||||
|
||||
bucket_nr = val_to_bucket(val);
|
||||
|
||||
if (bucket_nr != OUTLIER_BUCKET) {
|
||||
int n_bucketnr = bucket_nr+1;
|
||||
if (buckets[bucket_nr] == buckets[n_bucketnr]) {
|
||||
print_dotdotdot = 1;
|
||||
continue;
|
||||
}
|
||||
if (print_dotdotdot) {
|
||||
printf("...\n");
|
||||
print_dotdotdot = 0;
|
||||
}
|
||||
printf("[%lld - %lld] = %lld\n", val,
|
||||
val + VALS_PER_BUCKET-1,
|
||||
buckets[bucket_nr]);
|
||||
} else {
|
||||
if (print_dotdotdot) {
|
||||
printf("...\n");
|
||||
print_dotdotdot = 0;
|
||||
}
|
||||
printf("[%lld - END] = %lld\n", val,
|
||||
buckets[bucket_nr]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void print_max_bucketsec(void)
|
||||
{
|
||||
int i, bucket_nr;
|
||||
unsigned long long highest_val = 0;
|
||||
|
||||
for (i = 0; i <= OUTLIER_BUCKET; i++) {
|
||||
unsigned long long val = i*VALS_PER_BUCKET;
|
||||
|
||||
bucket_nr = val_to_bucket(val);
|
||||
|
||||
if (buckets[bucket_nr] != 0) {
|
||||
highest_val = val;
|
||||
}
|
||||
}
|
||||
|
||||
bucket_nr = val_to_bucket(highest_val);
|
||||
printf("Max loop processing time: [%lld - %lld] = %lld\n", highest_val,
|
||||
highest_val + VALS_PER_BUCKET-1,
|
||||
buckets[bucket_nr]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void print_min_bucketsec(void)
|
||||
{
|
||||
int i, bucket_nr;
|
||||
unsigned long long min_val = 0;
|
||||
|
||||
for (i = 0; i <= OUTLIER_BUCKET; i++) {
|
||||
unsigned long long val = i*VALS_PER_BUCKET;
|
||||
|
||||
bucket_nr = val_to_bucket(val);
|
||||
|
||||
if (buckets[bucket_nr] != 0) {
|
||||
min_val = val;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bucket_nr = val_to_bucket(min_val);
|
||||
printf("Min loop processing time: [%lld - %lld] = %lld\n", min_val,
|
||||
min_val + VALS_PER_BUCKET-1,
|
||||
buckets[bucket_nr]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void print_avg_bucketsec(void)
|
||||
{
|
||||
int i, bucket_nr;
|
||||
unsigned long long total_sum = 0;
|
||||
unsigned long long nr_hits = 0;
|
||||
|
||||
for (i = 0; i <= OUTLIER_BUCKET; i++) {
|
||||
unsigned long long val = i*VALS_PER_BUCKET;
|
||||
unsigned long long maxtime;
|
||||
|
||||
bucket_nr = val_to_bucket(val);
|
||||
|
||||
maxtime = val + VALS_PER_BUCKET-1;
|
||||
total_sum = total_sum + maxtime*buckets[bucket_nr];
|
||||
|
||||
nr_hits = nr_hits + buckets[bucket_nr];
|
||||
}
|
||||
|
||||
printf("Avg loop processing time: %lld\n", total_sum / nr_hits);
|
||||
}
|
||||
|
||||
static void print_all_buckets_drainlength(void)
|
||||
{
|
||||
int i, print_dotdotdot = 0;
|
||||
|
||||
for (i = 0; i <= OUTLIER_BUCKET; i++) {
|
||||
int bucket_nr;
|
||||
unsigned long long val = i*VALS_PER_BUCKET;
|
||||
|
||||
bucket_nr = val_to_bucket(val);
|
||||
|
||||
if (bucket_nr != OUTLIER_BUCKET) {
|
||||
unsigned long long mindelta, maxdelta;
|
||||
int nr_packets_minfill, nr_packets_maxfill;
|
||||
int n_bucketnr = bucket_nr+1;
|
||||
|
||||
if (buckets[bucket_nr] == buckets[n_bucketnr]) {
|
||||
print_dotdotdot = 1;
|
||||
continue;
|
||||
}
|
||||
if (print_dotdotdot) {
|
||||
printf("...\n");
|
||||
print_dotdotdot = 0;
|
||||
}
|
||||
|
||||
mindelta = val;
|
||||
maxdelta = val + VALS_PER_BUCKET-1;
|
||||
|
||||
nr_packets_minfill = mindelta * mpps * 1000000 / NSEC_PER_SEC;
|
||||
nr_packets_maxfill = maxdelta * mpps * 1000000 / NSEC_PER_SEC;
|
||||
|
||||
printf("[%lld - %lld] = %lld packetfillrates=[%d - %d]\n", val,
|
||||
val + VALS_PER_BUCKET-1,
|
||||
buckets[bucket_nr],
|
||||
nr_packets_minfill,
|
||||
nr_packets_maxfill);
|
||||
} else {
|
||||
if (print_dotdotdot) {
|
||||
printf("...\n");
|
||||
print_dotdotdot = 0;
|
||||
}
|
||||
printf("[%lld - END] = %lld\n", val,
|
||||
buckets[bucket_nr]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef unsigned long long cycles_t;
|
||||
typedef unsigned long long usecs_t;
|
||||
typedef unsigned long long u64;
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define DECLARE_ARGS(val, low, high) unsigned low, high
|
||||
#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32))
|
||||
#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
|
||||
#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
|
||||
#else
|
||||
#define DECLARE_ARGS(val, low, high) unsigned long long val
|
||||
#define EAX_EDX_VAL(val, low, high) (val)
|
||||
#define EAX_EDX_ARGS(val, low, high) "A" (val)
|
||||
#define EAX_EDX_RET(val, low, high) "=A" (val)
|
||||
#endif
|
||||
|
||||
static inline unsigned long long __rdtscll(void)
|
||||
{
|
||||
DECLARE_ARGS(val, low, high);
|
||||
|
||||
asm volatile("cpuid; rdtsc" : EAX_EDX_RET(val, low, high));
|
||||
|
||||
return EAX_EDX_VAL(val, low, high);
|
||||
}
|
||||
|
||||
#define rdtscll(val) do { (val) = __rdtscll(); } while (0)
|
||||
|
||||
static void init_buckets(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i <= NR_BUCKETS; i++)
|
||||
buckets[i] = 0;
|
||||
|
||||
total_count = 0;
|
||||
}
|
||||
|
||||
static int find_highest_count_bucket(void)
|
||||
{
|
||||
int i;
|
||||
int max_bucket = 0;
|
||||
unsigned long long int max_val = 0;
|
||||
|
||||
for (i=0; i <= NR_BUCKETS; i++) {
|
||||
if (buckets[i] > max_val) {
|
||||
max_bucket = i;
|
||||
max_val = buckets[i];
|
||||
}
|
||||
}
|
||||
|
||||
return max_bucket;
|
||||
}
|
||||
|
||||
int tracing_mark_fd;
|
||||
static void trace_open(void)
|
||||
{
|
||||
int fd;
|
||||
|
||||
fd = open("/sys/kernel/debug/tracing/trace_marker", O_RDWR);
|
||||
|
||||
if (fd == -1) {
|
||||
perror("open");
|
||||
exit(0);
|
||||
}
|
||||
tracing_mark_fd = fd;
|
||||
}
|
||||
|
||||
static void trace_write(char *buf, int len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = write(tracing_mark_fd, buf, len);
|
||||
if (ret == -1) {
|
||||
perror("write");
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void run_n(int n)
|
||||
{
|
||||
u64 a, b;
|
||||
void *dest, *src;
|
||||
int i, delta, loops = 50000;
|
||||
|
||||
init_buckets();
|
||||
|
||||
dest = malloc(n);
|
||||
if (dest == NULL) {
|
||||
printf("failure to allocate %d bytes "
|
||||
" for dest\n", n);
|
||||
exit(0);
|
||||
}
|
||||
src = malloc(n);
|
||||
if (src == NULL) {
|
||||
printf("failure to allocate %d bytes "
|
||||
" for src\n", n);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
memset(src, 0, n);
|
||||
|
||||
memmove(dest, src, n);
|
||||
for (i = 0; i < loops; i++) {
|
||||
rdtscll(b);
|
||||
memmove(dest, src, n);
|
||||
rdtscll(a);
|
||||
delta = (a - b) * cycles_to_ns;
|
||||
account(delta);
|
||||
}
|
||||
|
||||
free(dest);
|
||||
free(src);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the size of n such that the stats for the
|
||||
* function call
|
||||
*
|
||||
* memmove(dest, src, n).
|
||||
*
|
||||
* Takes MaximumLat/2 in the bucket that has most
|
||||
* entries.
|
||||
*
|
||||
*/
|
||||
static int measure_n(void)
|
||||
{
|
||||
int time, bucket_nr;
|
||||
int n = 100000, delta = 0;
|
||||
|
||||
do {
|
||||
if (delta > 0)
|
||||
n = n+1000;
|
||||
else if (delta < 0)
|
||||
n = n-1000;
|
||||
|
||||
run_n(n);
|
||||
bucket_nr = find_highest_count_bucket();
|
||||
|
||||
time = bucket_nr * VALS_PER_BUCKET;
|
||||
|
||||
delta = maxlatency/2 - time;
|
||||
} while (abs(delta) > VALS_PER_BUCKET*2);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static void convert_to_ghz(double tsc_freq_mhz)
|
||||
{
|
||||
float tsc_freq_ghz = tsc_freq_mhz/1000;
|
||||
|
||||
cycles_to_ns = 1/tsc_freq_ghz;
|
||||
|
||||
printf("tsc_freq_ghz = %f, cycles_to_ns = %f\n", tsc_freq_ghz,
|
||||
cycles_to_ns);
|
||||
}
|
||||
|
||||
|
||||
static void print_exit_info(void)
|
||||
{
|
||||
print_all_buckets();
|
||||
printf("\n ---------------- \n");
|
||||
print_min_bucketsec();
|
||||
print_max_bucketsec();
|
||||
print_avg_bucketsec();
|
||||
|
||||
}
|
||||
|
||||
void main_loop(void)
|
||||
{
|
||||
u64 a, b;
|
||||
void *dest, *src;
|
||||
int delta;
|
||||
int queue_size = 0;
|
||||
|
||||
trace_open();
|
||||
|
||||
init_buckets();
|
||||
|
||||
dest = malloc(default_n);
|
||||
if (dest == NULL) {
|
||||
printf("failure to allocate %d bytes "
|
||||
" for dest\n", default_n);
|
||||
exit(0);
|
||||
}
|
||||
src = malloc(default_n);
|
||||
if (src == NULL) {
|
||||
printf("failure to allocate %d bytes "
|
||||
" for src\n", default_n);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
memset(src, 0, default_n);
|
||||
memmove(dest, src, default_n);
|
||||
|
||||
while (1) {
|
||||
char buf[500];
|
||||
int ret;
|
||||
int nr_packets_fill;
|
||||
|
||||
rdtscll(b);
|
||||
memmove(dest, src, default_n);
|
||||
rdtscll(a);
|
||||
delta = (a - b) * cycles_to_ns;
|
||||
account(delta);
|
||||
|
||||
/* fill up the queue by the amount of
|
||||
* time that passed */
|
||||
nr_packets_fill = delta * mpps * 1000000 / NSEC_PER_SEC;
|
||||
queue_size += nr_packets_fill;
|
||||
|
||||
/* decrease the queue by the amount of packets
|
||||
* processed in maxlatency/2 nanoseconds of
|
||||
* full processing.
|
||||
*/
|
||||
|
||||
queue_size -= nr_packets_drain_per_block;
|
||||
|
||||
if (queue_size < 0)
|
||||
queue_size = 0;
|
||||
|
||||
if (queue_size <= min_queue_size_to_print)
|
||||
continue;
|
||||
|
||||
ret = sprintf(buf, "memmove block queue_size=%d queue_dec=%d"
|
||||
" queue_inc=%d delta=%d ns\n", queue_size,
|
||||
nr_packets_drain_per_block,
|
||||
nr_packets_fill, delta);
|
||||
trace_write(buf, ret);
|
||||
|
||||
if (queue_size > max_queue_len) {
|
||||
printf("queue length exceeded: "
|
||||
" queue_size=%d max_queue_len=%d\n",
|
||||
queue_size, max_queue_len);
|
||||
ret = sprintf(buf, "queue length exceeded: "
|
||||
"queue_size=%d max_queue_len=%d\n",
|
||||
queue_size, max_queue_len);
|
||||
trace_write(buf, ret);
|
||||
print_exit_info();
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
free(dest);
|
||||
free(src);
|
||||
}
|
||||
|
||||
void sig_handler(int sig)
|
||||
{
|
||||
print_exit_info();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static void install_signals(void)
|
||||
{
|
||||
signal(SIGALRM, sig_handler);
|
||||
signal(SIGINT, sig_handler);
|
||||
}
|
||||
|
||||
int calculate_nr_packets_drain_per_block(void)
|
||||
{
|
||||
int maxcount;
|
||||
int i, time;
|
||||
int found = 0;
|
||||
int bucket_nr = find_highest_count_bucket();
|
||||
|
||||
maxcount = total_samples() / 40;
|
||||
|
||||
for (i = bucket_nr+1; i <= NR_BUCKETS; i++) {
|
||||
if (buckets[i] < maxcount) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found == 0) {
|
||||
printf("error, did not find right bucket with < 10%% of total\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
time = i*VALS_PER_BUCKET + VALS_PER_BUCKET-1;
|
||||
nr_packets_drain_per_block = time / (cycles_per_packet*cycles_to_ns);
|
||||
|
||||
return nr_packets_drain_per_block;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
double tsc_freq_mhz;
|
||||
float max_queue_len_f;
|
||||
char *mvalue = NULL;
|
||||
char *cvalue = NULL;
|
||||
char *pvalue = NULL;
|
||||
char *fvalue = NULL;
|
||||
char *tvalue = NULL;
|
||||
char *qvalue = NULL;
|
||||
int index;
|
||||
int c;
|
||||
|
||||
install_signals();
|
||||
|
||||
opterr = 0;
|
||||
|
||||
while ((c = getopt (argc, argv, "m:c:p:f:t:q:")) != -1)
|
||||
switch (c)
|
||||
{
|
||||
case 'm':
|
||||
mvalue = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
cvalue = optarg;
|
||||
break;
|
||||
case 'p':
|
||||
pvalue = optarg;
|
||||
break;
|
||||
case 'f':
|
||||
fvalue = optarg;
|
||||
break;
|
||||
case 't':
|
||||
tvalue = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
qvalue = optarg;
|
||||
break;
|
||||
case '?':
|
||||
if (optopt == 'm' || optopt == 'c' || optopt == 'p' ||
|
||||
optopt == 'f' || optopt == 't' || optopt == 'q')
|
||||
printf ("Option -%c requires an argument.\n", optopt);
|
||||
else if (isprint (optopt))
|
||||
printf ("Unknown option `-%c'.\n", optopt);
|
||||
else
|
||||
printf ( "Unknown option character `\\x%x'.\n",
|
||||
|
||||
optopt);
|
||||
return 1;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (mvalue == NULL || cvalue == NULL || pvalue == NULL ||
|
||||
fvalue == NULL) {
|
||||
printf("options -m, -c, -p and -f required.\n");
|
||||
printf("usage: %s -m maxlatency -c cycles_per_packet -p mpps(million-packet-per-sec) -f tsc_freq_mhz [-t timeout (in secs)] [-q min_queue_len_to_print_trace]\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
maxlatency = atoi(mvalue);
|
||||
cycles_per_packet = atoi(cvalue);
|
||||
mpps = atof(pvalue);
|
||||
tsc_freq_mhz = atof(fvalue);
|
||||
|
||||
if (tvalue) {
|
||||
int alarm_secs;
|
||||
alarm_secs = atoi(tvalue);
|
||||
alarm(alarm_secs);
|
||||
}
|
||||
|
||||
if (qvalue) {
|
||||
min_queue_size_to_print = atoi(qvalue);
|
||||
}
|
||||
|
||||
if (optind != argc) {
|
||||
for (index = optind; index < argc; index++) {
|
||||
printf ("Error, non-option argument %s\n", argv[index]);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
convert_to_ghz(tsc_freq_mhz);
|
||||
|
||||
max_queue_len_f = maxlatency / (cycles_per_packet*cycles_to_ns);
|
||||
max_queue_len = max_queue_len_f;
|
||||
|
||||
printf("max_queue_len = %d\n", max_queue_len);
|
||||
default_n = measure_n();
|
||||
|
||||
nr_packets_drain_per_block = calculate_nr_packets_drain_per_block();
|
||||
print_all_buckets_drainlength();
|
||||
|
||||
printf("default_n=%d nr_packets_drain_per_block=%d\n", default_n,
|
||||
nr_packets_drain_per_block);
|
||||
|
||||
main_loop();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
obj-m := targeted-ipi.o
|
||||
|
|
@ -0,0 +1 @@
|
|||
obj-$(CONFIG_TARGETED_IPI) += targeted-ipi.o
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
To build:
|
||||
|
||||
make -C <path_to_kernel_src> M=$PWD
|
||||
|
||||
To build against the running kernel use:
|
||||
|
||||
make -C /lib/modules/`uname -r`/build M=$PWD
|
||||
|
||||
|
||||
Read targeted-ipi.c for the parameters.
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/smp.h>
|
||||
#include <asm-generic/delay.h>
|
||||
|
||||
static int ipidest;
|
||||
module_param(ipidest, int, S_IRUGO);
|
||||
|
||||
/* number of ipis */
|
||||
static int nripis;
|
||||
module_param(nripis, int, S_IRUGO);
|
||||
|
||||
/* interval between consecutive IPI calls */
|
||||
static int interval;
|
||||
module_param(interval, int, S_IRUGO);
|
||||
|
||||
/* how many microseconds to delay in IPI handler */
|
||||
static int delay;
|
||||
module_param(delay, int, S_IRUGO);
|
||||
|
||||
static void ipi_handler(void *info)
|
||||
{
|
||||
udelay(interval);
|
||||
}
|
||||
|
||||
static int targeted_ipi_init(void)
|
||||
{
|
||||
int ret, i;
|
||||
|
||||
for (i=0; i < nripis; i++)
|
||||
{
|
||||
ret = smp_call_function_single(ipidest, ipi_handler, NULL, 1);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "i=%d smp_call_function_single ret=%d\n", i, ret);
|
||||
return 0;
|
||||
}
|
||||
udelay(interval);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void targeted_ipi_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(targeted_ipi_init);
|
||||
module_exit(targeted_ipi_exit);
|
||||
MODULE_LICENSE("GPL");
|
Loading…
Reference in New Issue