Merge remote-tracking branch 'marcelowt2/queuelat' into unstable/devel/v1.2

Merging queuelat into rt-tests

Signed-off-by: John Kacur <jkacur@redhat.com>
This commit is contained in:
John Kacur 2018-04-30 15:27:49 +01:00
commit 62d2e8b40c
10 changed files with 991 additions and 1 deletions

View File

@ -16,7 +16,8 @@ sources = cyclictest.c \
sigwaittest.c \
svsematest.c \
cyclicdeadline.c \
deadline_test.c
deadline_test.c \
queuelat.c
TARGETS = $(sources:.c=)
LIBS = -lrt -lpthread
@ -86,6 +87,7 @@ VPATH += src/backfire:
VPATH += src/lib:
VPATH += src/hackbench:
VPATH += src/sched_deadline:
VPATH += src/queuelat:
$(OBJDIR)/%.o: %.c | $(OBJDIR)
$(CC) -D VERSION=$(VERSION) -c $< $(CFLAGS) $(CPPFLAGS) -o $@
@ -146,6 +148,9 @@ pip_stress: $(OBJDIR)/pip_stress.o $(OBJDIR)/librttest.a
hackbench: $(OBJDIR)/hackbench.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS)
queuelat: $(OBJDIR)/queuelat.o $(OBJDIR)/librttest.a
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(RTTESTLIB)
LIBOBJS =$(addprefix $(OBJDIR)/,error.o rt-get_cpu.o rt-sched.o rt-utils.o)
$(OBJDIR)/librttest.a: $(LIBOBJS)
$(AR) rcs $@ $^

9
src/queuelat/Makefile Normal file
View File

@ -0,0 +1,9 @@
queuelat: queuelat.o
cc -o queuelat queuelat.o
queuelat.o: queuelat.c
cc -c -Wall queuelat.c
clean:
rm queuelat queuelat.o

142
src/queuelat/README Normal file
View File

@ -0,0 +1,142 @@
cyclictest does not catch all cases where packet forwarding
latency can exceed a given threshold.
Example:
# taskset -c 3 ./queuelat -m 20000 -c 100 -p 13 -f `sh ./get_cpuinfo_mhz.sh`
# rmmod targeted-ipi; insmod ./targeted-ipi.ko ipidest=3 nripis=200 interval=2 delay=10
<...>-4566 [003] .....11 4474.559880: tracing_mark_write: memmove block queue_size=28 queue_dec=279 queue_inc=307 delta=23685 ns
<...>-4566 [003] .....11 4474.559912: tracing_mark_write: memmove block queue_size=63 queue_dec=279 queue_inc=314 delta=24198 ns
<...>-4566 [003] .....11 4474.559937: tracing_mark_write: memmove block queue_size=97 queue_dec=279 queue_inc=313 delta=24090 ns
<...>-4566 [003] .....11 4474.559965: tracing_mark_write: memmove block queue_size=130 queue_dec=279 queue_inc=312 delta=24048 ns
<...>-4566 [003] .....11 4474.559993: tracing_mark_write: memmove block queue_size=162 queue_dec=279 queue_inc=311 delta=23957 ns
<...>-4566 [003] .....11 4474.560018: tracing_mark_write: memmove block queue_size=193 queue_dec=279 queue_inc=310 delta=23912 ns
<...>-4566 [003] .....11 4474.560046: tracing_mark_write: memmove block queue_size=225 queue_dec=279 queue_inc=311 delta=23965 ns
<...>-4566 [003] .....11 4474.560074: tracing_mark_write: memmove block queue_size=257 queue_dec=279 queue_inc=311 delta=23971 ns
<...>-4566 [003] .....11 4474.560102: tracing_mark_write: memmove block queue_size=288 queue_dec=279 queue_inc=310 delta=23902 ns
<...>-4566 [003] .....11 4474.560127: tracing_mark_write: memmove block queue_size=320 queue_dec=279 queue_inc=311 delta=23945 ns
<...>-4566 [003] .....11 4474.560155: tracing_mark_write: memmove block queue_size=351 queue_dec=279 queue_inc=310 delta=23921 ns
<...>-4566 [003] .....11 4474.560180: tracing_mark_write: memmove block queue_size=381 queue_dec=279 queue_inc=309 delta=23839 ns
<...>-4566 [003] .....11 4474.560208: tracing_mark_write: memmove block queue_size=412 queue_dec=279 queue_inc=310 delta=23876 ns
<...>-4566 [003] .....11 4474.560236: tracing_mark_write: memmove block queue_size=443 queue_dec=279 queue_inc=310 delta=23886 ns
<...>-4566 [003] .....11 4474.560261: tracing_mark_write: memmove block queue_size=474 queue_dec=279 queue_inc=310 delta=23901 ns
<...>-4566 [003] .....11 4474.560288: tracing_mark_write: memmove block queue_size=505 queue_dec=279 queue_inc=310 delta=23891 ns
<...>-4566 [003] .....11 4474.560316: tracing_mark_write: memmove block queue_size=535 queue_dec=279 queue_inc=309 delta=23822 ns
<...>-4566 [003] .....11 4474.560341: tracing_mark_write: memmove block queue_size=565 queue_dec=279 queue_inc=309 delta=23815 ns
<...>-4566 [003] .....11 4474.560353: tracing_mark_write: queue length exceeded: queue_size=565 max_queue_len=559
# taskset -c 3 cyclictest -m -n -q -p95 -D 60m -h60 -i 200
# rmmod targeted-ipi; insmod ./targeted-ipi.ko ipidest=3 nripis=20000 interval=2 delay=10
Cyclictest results:
# Total: 000068099
# Min Latencies: 00001
# Avg Latencies: 00002
# Max Latencies: 00008
# Histogram Overflows: 00000
----- queuelat basics:
Queuelat simulates a DPDK queue. From queuelat.c:
Program parameters:
max_queue_len: maximum latency allowed, in nanoseconds (int).
cycles_per_packet: number of cycles to process one packet (int).
mpps(million-packet-per-sec): million packets per second (float).
tsc_freq_mhz: TSC frequency in MHz, as measured by TSC PIT calibration
(search for "Detected XXX MHz processor" in dmesg, and use the integer part).
How it works
============
The program in essence does:
b = rdtsc();
memmove(dest, src, n);
a = rdtsc();
delay = convert_to_ns(a - b);
queue_size += packets_queued_in(delay);
queue_size -= packets_processed;
if (queue_size > max_queue_len)
FAIL();
packets_processed is fixed, and is estimated as follows:
n is determined first, so that the stats bucket with highest count
takes max_latency/2.
for max_latency/2, we calculate how many packets can be drained
in that time (using cycles_per_packet).
Queuelat output
===============
During calibration, queuelat outputs the following table:
[9600 - 9699] = 0 packetfillrates=[67 - 67]
[9700 - 9799] = 7907 packetfillrates=[67 - 68]
[9800 - 9899] = 42085 packetfillrates=[68 - 69]
[9900 - 9999] = 7 packetfillrates=[69 - 69]
[10000 - 10099] = 1 packetfillrates=[70 - 70]
| | | |
| | | |
| | | |_________ [min - max] number of
| | | packets the queue will reach
| | | with specified mpps in this
| | | time (without draining)
| | |
| | |______________________________ number of hits for this
| | bucket
| |
| |______________________________________ min amount of time (ns)
| this bucket accepts
|
|______________________________________________ max amount of time (ns)
this bucket accepts
On success, queuelat outputs a table similar to cyclictest:
[7000 - 7099] = 0
[7100 - 7199] = 2
[7200 - 7299] = 2457
[7300 - 7399] = 21058
| | |
| | |___________ Number of processing loops that hit this
| | bucket.
| |
| |____________________ Maximum number of nanoseconds of this bucket.
|
|
|___________________________ Minimum number of nanoseconds in this bucket.
That is a processing loop will account into a bucket if its duration
is
min_number_ns_in_bucket < duration < max_number_ns_in_bucket
Automatic determination of Mpps
===============================
There is a script called determine_maximum_mpps.sh, which should be edited
to include the pinning and -RT priority configuration for your machine.
PREAMBLE="taskset -c 2 chrt -f 1"
MAXLAT="20000"
CYCLES_PER_PACKET="300"
This script will find the maximum mpps parameter which can sustain:
1) 10 consecutive 30 second runs.
2) 1 run of 10 minutes.
Without violating the latency specified with $MAXLAT.

View File

@ -0,0 +1,128 @@
#!/bin/bash
# A script to determine the maximum mpps. Logic:
# Increase mpps in 0.5 units
#
# NOTE: please set "PREAMBLE" to the command line you use for
#
PREAMBLE="taskset -c 2 chrt -f 1"
MAXLAT="20000"
CYCLES_PER_PACKET="300"
echo "Determining maximum mpps the machine can handle"
echo "Will take a few minutes to determine mpps value"
echo "And 10 minutes run to confirm the final mpps value is stable"
for mpps in `seq 3 3 50`; do
echo testing $mpps Mpps
outfile=`mktemp`
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
exceeded=`grep exceeded $outfile`
if [ ! -z "$exceeded" ]; then
echo mpps failed: $mpps
break;
fi
echo success
done
echo first loop mpps: $mpps
first_mpps=$(($mpps - 1))
for mpps in `seq $first_mpps -1 3`; do
echo testing $mpps Mpps
outfile=`mktemp`
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
exceeded=`grep exceeded $outfile`
if [ -z "$exceeded" ]; then
echo mpps success $mpps
break;
fi
echo failure
done
second_mpps=`echo "$mpps + 0.3" | bc`
echo second loop mpps: $mpps
for mpps in `seq $second_mpps 0.3 $first_mpps`; do
echo testing $mpps Mpps
outfile=`mktemp`
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
exceeded=`grep exceeded $outfile`
if [ ! -z "$exceeded" ]; then
echo mpps failure $mpps
break;
fi
echo success
done
echo third loop mpps: $mpps
third_mpps=`echo "$mpps -0.1" | bc`
for mpps in `seq $third_mpps -0.1 3`; do
echo testing $mpps Mpps
outfile=`mktemp`
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
exceeded=`grep exceeded $outfile`
if [ -z "$exceeded" ]; then
echo mpps success $mpps
break;
fi
echo failure
done
export queuelat_failure=1
while [ $queuelat_failure == 1 ]; do
export queuelat_failure=0
echo -n "Starting 10 runs of 30 seconds with "
echo "$mpps Mpps"
for i in `seq 1 10`; do
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile
exceeded=`grep exceeded $outfile`
if [ ! -z "$exceeded" ]; then
echo "mpps failure (run $i) $mpps"
export queuelat_failure=1
export mpps=`echo $mpps - 0.1 | bc`
break
fi
echo "run $i success"
done
done
export queuelat_failure=1
while [ $queuelat_failure == 1 ]; do
export queuelat_failure=0
echo -n "Starting 10 minutes run with "
echo "$mpps Mpps"
$PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `./get_cpuinfo_mhz.sh` -p $mpps -t 600 > $outfile
exceeded=`grep exceeded $outfile`
if [ ! -z "$exceeded" ]; then
echo "mpps failure (run $i) $mpps"
export queuelat_failure=1
export mpps=`echo $mpps - 0.1 | bc`
continue
fi
echo "run $i success"
done
echo Final mpps is: $mpps
unset queuelat_failure
unset mpps

View File

@ -0,0 +1,5 @@
#!/bin/bash
mhz=`cat /proc/cpuinfo | grep "cpu MHz" | uniq | cut -f 3 -d " "`
echo $mhz

635
src/queuelat/queuelat.c Normal file
View File

@ -0,0 +1,635 @@
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#define NSEC_PER_SEC 1000000000
/* Program parameters:
* max_queue_len: maximum latency allowed, in nanoseconds (int).
* cycles_per_packet: number of cycles to process one packet (int).
* mpps(million-packet-per-sec): million packets per second (float).
* tsc_freq_mhz: TSC frequency in MHz, as measured by TSC PIT calibration
* (search for "Detected XXX MHz processor" in dmesg, and use the integer part).
*
* How it works
* ============
*
* The program in essence does:
*
* b = rdtsc();
* memmove(dest, src, n);
* a = rdtsc();
*
* delay = convert_to_ns(a - b);
*
* queue_size += packets_queued_in(delay);
* queue_size -= packets_processed;
*
* if (queue_size > max_queue_len)
* FAIL();
*
* packets_processed is fixed, and is estimated as follows:
* n is determined first, so that the stats bucket with highest count
* takes max_latency/2.
* for max_latency/2, we calculate how many packets can be drained
* in that time (using cycles_per_packet).
*
*/
int maxlatency;
int cycles_per_packet;
float mpps;
int timeout_secs;
int min_queue_size_to_print;
/* Derived constants */
float cycles_to_ns;
int max_queue_len;
int default_n;
int nr_packets_drain_per_block;
/*
* Parameters for the stats collection buckets
*/
#define LAST_VAL 70000
#define VALS_PER_BUCKET 100
#define NR_BUCKETS LAST_VAL/VALS_PER_BUCKET
unsigned long long int buckets[NR_BUCKETS+1];
unsigned long long int total_count;
#define OUTLIER_BUCKET NR_BUCKETS
static int val_to_bucket(unsigned long long val)
{
int bucket_nr = val / VALS_PER_BUCKET;
if (bucket_nr >= NR_BUCKETS) {
return OUTLIER_BUCKET;
}
return bucket_nr;
}
static void account(unsigned long long val)
{
int bucket_nr = val_to_bucket(val);
buckets[bucket_nr]++;
total_count++;
}
static unsigned long long total_samples(void)
{
int i;
unsigned long long total = 0;
for (i = 0; i <= OUTLIER_BUCKET; i++) {
total += buckets[i];
}
return total;
}
static void print_all_buckets(void)
{
int i, print_dotdotdot = 0;
for (i = 0; i <= OUTLIER_BUCKET; i++) {
int bucket_nr;
unsigned long long val = i*VALS_PER_BUCKET;
bucket_nr = val_to_bucket(val);
if (bucket_nr != OUTLIER_BUCKET) {
int n_bucketnr = bucket_nr+1;
if (buckets[bucket_nr] == buckets[n_bucketnr]) {
print_dotdotdot = 1;
continue;
}
if (print_dotdotdot) {
printf("...\n");
print_dotdotdot = 0;
}
printf("[%lld - %lld] = %lld\n", val,
val + VALS_PER_BUCKET-1,
buckets[bucket_nr]);
} else {
if (print_dotdotdot) {
printf("...\n");
print_dotdotdot = 0;
}
printf("[%lld - END] = %lld\n", val,
buckets[bucket_nr]);
}
}
}
static void print_max_bucketsec(void)
{
int i, bucket_nr;
unsigned long long highest_val = 0;
for (i = 0; i <= OUTLIER_BUCKET; i++) {
unsigned long long val = i*VALS_PER_BUCKET;
bucket_nr = val_to_bucket(val);
if (buckets[bucket_nr] != 0) {
highest_val = val;
}
}
bucket_nr = val_to_bucket(highest_val);
printf("Max loop processing time: [%lld - %lld] = %lld\n", highest_val,
highest_val + VALS_PER_BUCKET-1,
buckets[bucket_nr]);
return;
}
static void print_min_bucketsec(void)
{
int i, bucket_nr;
unsigned long long min_val = 0;
for (i = 0; i <= OUTLIER_BUCKET; i++) {
unsigned long long val = i*VALS_PER_BUCKET;
bucket_nr = val_to_bucket(val);
if (buckets[bucket_nr] != 0) {
min_val = val;
break;
}
}
bucket_nr = val_to_bucket(min_val);
printf("Min loop processing time: [%lld - %lld] = %lld\n", min_val,
min_val + VALS_PER_BUCKET-1,
buckets[bucket_nr]);
return;
}
static void print_avg_bucketsec(void)
{
int i, bucket_nr;
unsigned long long total_sum = 0;
unsigned long long nr_hits = 0;
for (i = 0; i <= OUTLIER_BUCKET; i++) {
unsigned long long val = i*VALS_PER_BUCKET;
unsigned long long maxtime;
bucket_nr = val_to_bucket(val);
maxtime = val + VALS_PER_BUCKET-1;
total_sum = total_sum + maxtime*buckets[bucket_nr];
nr_hits = nr_hits + buckets[bucket_nr];
}
printf("Avg loop processing time: %lld\n", total_sum / nr_hits);
}
static void print_all_buckets_drainlength(void)
{
int i, print_dotdotdot = 0;
for (i = 0; i <= OUTLIER_BUCKET; i++) {
int bucket_nr;
unsigned long long val = i*VALS_PER_BUCKET;
bucket_nr = val_to_bucket(val);
if (bucket_nr != OUTLIER_BUCKET) {
unsigned long long mindelta, maxdelta;
int nr_packets_minfill, nr_packets_maxfill;
int n_bucketnr = bucket_nr+1;
if (buckets[bucket_nr] == buckets[n_bucketnr]) {
print_dotdotdot = 1;
continue;
}
if (print_dotdotdot) {
printf("...\n");
print_dotdotdot = 0;
}
mindelta = val;
maxdelta = val + VALS_PER_BUCKET-1;
nr_packets_minfill = mindelta * mpps * 1000000 / NSEC_PER_SEC;
nr_packets_maxfill = maxdelta * mpps * 1000000 / NSEC_PER_SEC;
printf("[%lld - %lld] = %lld packetfillrates=[%d - %d]\n", val,
val + VALS_PER_BUCKET-1,
buckets[bucket_nr],
nr_packets_minfill,
nr_packets_maxfill);
} else {
if (print_dotdotdot) {
printf("...\n");
print_dotdotdot = 0;
}
printf("[%lld - END] = %lld\n", val,
buckets[bucket_nr]);
}
}
}
typedef unsigned long long cycles_t;
typedef unsigned long long usecs_t;
typedef unsigned long long u64;
#ifdef __x86_64__
#define DECLARE_ARGS(val, low, high) unsigned low, high
#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32))
#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
#else
#define DECLARE_ARGS(val, low, high) unsigned long long val
#define EAX_EDX_VAL(val, low, high) (val)
#define EAX_EDX_ARGS(val, low, high) "A" (val)
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
static inline unsigned long long __rdtscll(void)
{
DECLARE_ARGS(val, low, high);
asm volatile("cpuid; rdtsc" : EAX_EDX_RET(val, low, high));
return EAX_EDX_VAL(val, low, high);
}
#define rdtscll(val) do { (val) = __rdtscll(); } while (0)
static void init_buckets(void)
{
int i;
for (i=0; i <= NR_BUCKETS; i++)
buckets[i] = 0;
total_count = 0;
}
static int find_highest_count_bucket(void)
{
int i;
int max_bucket = 0;
unsigned long long int max_val = 0;
for (i=0; i <= NR_BUCKETS; i++) {
if (buckets[i] > max_val) {
max_bucket = i;
max_val = buckets[i];
}
}
return max_bucket;
}
int tracing_mark_fd;
static void trace_open(void)
{
int fd;
fd = open("/sys/kernel/debug/tracing/trace_marker", O_RDWR);
if (fd == -1) {
perror("open");
exit(0);
}
tracing_mark_fd = fd;
}
static void trace_write(char *buf, int len)
{
int ret;
ret = write(tracing_mark_fd, buf, len);
if (ret == -1) {
perror("write");
exit(0);
}
}
static void run_n(int n)
{
u64 a, b;
void *dest, *src;
int i, delta, loops = 50000;
init_buckets();
dest = malloc(n);
if (dest == NULL) {
printf("failure to allocate %d bytes "
" for dest\n", n);
exit(0);
}
src = malloc(n);
if (src == NULL) {
printf("failure to allocate %d bytes "
" for src\n", n);
exit(0);
}
memset(src, 0, n);
memmove(dest, src, n);
for (i = 0; i < loops; i++) {
rdtscll(b);
memmove(dest, src, n);
rdtscll(a);
delta = (a - b) * cycles_to_ns;
account(delta);
}
free(dest);
free(src);
return;
}
/*
* Find the size of n such that the stats for the
* function call
*
* memmove(dest, src, n).
*
* Takes MaximumLat/2 in the bucket that has most
* entries.
*
*/
static int measure_n(void)
{
int time, bucket_nr;
int n = 100000, delta = 0;
do {
if (delta > 0)
n = n+1000;
else if (delta < 0)
n = n-1000;
run_n(n);
bucket_nr = find_highest_count_bucket();
time = bucket_nr * VALS_PER_BUCKET;
delta = maxlatency/2 - time;
} while (abs(delta) > VALS_PER_BUCKET*2);
return n;
}
static void convert_to_ghz(double tsc_freq_mhz)
{
float tsc_freq_ghz = tsc_freq_mhz/1000;
cycles_to_ns = 1/tsc_freq_ghz;
printf("tsc_freq_ghz = %f, cycles_to_ns = %f\n", tsc_freq_ghz,
cycles_to_ns);
}
static void print_exit_info(void)
{
print_all_buckets();
printf("\n ---------------- \n");
print_min_bucketsec();
print_max_bucketsec();
print_avg_bucketsec();
}
void main_loop(void)
{
u64 a, b;
void *dest, *src;
int delta;
int queue_size = 0;
trace_open();
init_buckets();
dest = malloc(default_n);
if (dest == NULL) {
printf("failure to allocate %d bytes "
" for dest\n", default_n);
exit(0);
}
src = malloc(default_n);
if (src == NULL) {
printf("failure to allocate %d bytes "
" for src\n", default_n);
exit(0);
}
memset(src, 0, default_n);
memmove(dest, src, default_n);
while (1) {
char buf[500];
int ret;
int nr_packets_fill;
rdtscll(b);
memmove(dest, src, default_n);
rdtscll(a);
delta = (a - b) * cycles_to_ns;
account(delta);
/* fill up the queue by the amount of
* time that passed */
nr_packets_fill = delta * mpps * 1000000 / NSEC_PER_SEC;
queue_size += nr_packets_fill;
/* decrease the queue by the amount of packets
* processed in maxlatency/2 nanoseconds of
* full processing.
*/
queue_size -= nr_packets_drain_per_block;
if (queue_size < 0)
queue_size = 0;
if (queue_size <= min_queue_size_to_print)
continue;
ret = sprintf(buf, "memmove block queue_size=%d queue_dec=%d"
" queue_inc=%d delta=%d ns\n", queue_size,
nr_packets_drain_per_block,
nr_packets_fill, delta);
trace_write(buf, ret);
if (queue_size > max_queue_len) {
printf("queue length exceeded: "
" queue_size=%d max_queue_len=%d\n",
queue_size, max_queue_len);
ret = sprintf(buf, "queue length exceeded: "
"queue_size=%d max_queue_len=%d\n",
queue_size, max_queue_len);
trace_write(buf, ret);
print_exit_info();
exit(0);
}
}
free(dest);
free(src);
}
void sig_handler(int sig)
{
print_exit_info();
exit(0);
}
static void install_signals(void)
{
signal(SIGALRM, sig_handler);
signal(SIGINT, sig_handler);
}
int calculate_nr_packets_drain_per_block(void)
{
int maxcount;
int i, time;
int found = 0;
int bucket_nr = find_highest_count_bucket();
maxcount = total_samples() / 40;
for (i = bucket_nr+1; i <= NR_BUCKETS; i++) {
if (buckets[i] < maxcount) {
found = 1;
break;
}
}
if (found == 0) {
printf("error, did not find right bucket with < 10%% of total\n");
exit(0);
}
time = i*VALS_PER_BUCKET + VALS_PER_BUCKET-1;
nr_packets_drain_per_block = time / (cycles_per_packet*cycles_to_ns);
return nr_packets_drain_per_block;
}
int main(int argc, char **argv)
{
double tsc_freq_mhz;
float max_queue_len_f;
char *mvalue = NULL;
char *cvalue = NULL;
char *pvalue = NULL;
char *fvalue = NULL;
char *tvalue = NULL;
char *qvalue = NULL;
int index;
int c;
install_signals();
opterr = 0;
while ((c = getopt (argc, argv, "m:c:p:f:t:q:")) != -1)
switch (c)
{
case 'm':
mvalue = optarg;
break;
case 'c':
cvalue = optarg;
break;
case 'p':
pvalue = optarg;
break;
case 'f':
fvalue = optarg;
break;
case 't':
tvalue = optarg;
break;
case 'q':
qvalue = optarg;
break;
case '?':
if (optopt == 'm' || optopt == 'c' || optopt == 'p' ||
optopt == 'f' || optopt == 't' || optopt == 'q')
printf ("Option -%c requires an argument.\n", optopt);
else if (isprint (optopt))
printf ("Unknown option `-%c'.\n", optopt);
else
printf ( "Unknown option character `\\x%x'.\n",
optopt);
return 1;
default:
abort ();
}
if (mvalue == NULL || cvalue == NULL || pvalue == NULL ||
fvalue == NULL) {
printf("options -m, -c, -p and -f required.\n");
printf("usage: %s -m maxlatency -c cycles_per_packet -p mpps(million-packet-per-sec) -f tsc_freq_mhz [-t timeout (in secs)] [-q min_queue_len_to_print_trace]\n", argv[0]);
return 1;
}
maxlatency = atoi(mvalue);
cycles_per_packet = atoi(cvalue);
mpps = atof(pvalue);
tsc_freq_mhz = atof(fvalue);
if (tvalue) {
int alarm_secs;
alarm_secs = atoi(tvalue);
alarm(alarm_secs);
}
if (qvalue) {
min_queue_size_to_print = atoi(qvalue);
}
if (optind != argc) {
for (index = optind; index < argc; index++) {
printf ("Error, non-option argument %s\n", argv[index]);
}
return 1;
}
convert_to_ghz(tsc_freq_mhz);
max_queue_len_f = maxlatency / (cycles_per_packet*cycles_to_ns);
max_queue_len = max_queue_len_f;
printf("max_queue_len = %d\n", max_queue_len);
default_n = measure_n();
nr_packets_drain_per_block = calculate_nr_packets_drain_per_block();
print_all_buckets_drainlength();
printf("default_n=%d nr_packets_drain_per_block=%d\n", default_n,
nr_packets_drain_per_block);
main_loop();
return 0;
}

View File

@ -0,0 +1,2 @@
obj-m := targeted-ipi.o

View File

@ -0,0 +1 @@
obj-$(CONFIG_TARGETED_IPI) += targeted-ipi.o

View File

@ -0,0 +1,12 @@
To build:
make -C <path_to_kernel_src> M=$PWD
To build against the running kernel use:
make -C /lib/modules/`uname -r`/build M=$PWD
Read targeted-ipi.c for the parameters.

View File

@ -0,0 +1,51 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/smp.h>
#include <asm-generic/delay.h>
static int ipidest;
module_param(ipidest, int, S_IRUGO);
/* number of ipis */
static int nripis;
module_param(nripis, int, S_IRUGO);
/* interval between consecutive IPI calls */
static int interval;
module_param(interval, int, S_IRUGO);
/* how many microseconds to delay in IPI handler */
static int delay;
module_param(delay, int, S_IRUGO);
static void ipi_handler(void *info)
{
udelay(interval);
}
static int targeted_ipi_init(void)
{
int ret, i;
for (i=0; i < nripis; i++)
{
ret = smp_call_function_single(ipidest, ipi_handler, NULL, 1);
if (ret) {
printk(KERN_ERR "i=%d smp_call_function_single ret=%d\n", i, ret);
return 0;
}
udelay(interval);
}
return 0;
}
static void targeted_ipi_exit(void)
{
}
module_init(targeted_ipi_init);
module_exit(targeted_ipi_exit);
MODULE_LICENSE("GPL");