diff --git a/Makefile b/Makefile index fc8bf74..cc296ee 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,8 @@ sources = cyclictest.c \ sigwaittest.c \ svsematest.c \ cyclicdeadline.c \ - deadline_test.c + deadline_test.c \ + queuelat.c TARGETS = $(sources:.c=) LIBS = -lrt -lpthread @@ -86,6 +87,7 @@ VPATH += src/backfire: VPATH += src/lib: VPATH += src/hackbench: VPATH += src/sched_deadline: +VPATH += src/queuelat: $(OBJDIR)/%.o: %.c | $(OBJDIR) $(CC) -D VERSION=$(VERSION) -c $< $(CFLAGS) $(CPPFLAGS) -o $@ @@ -146,6 +148,9 @@ pip_stress: $(OBJDIR)/pip_stress.o $(OBJDIR)/librttest.a hackbench: $(OBJDIR)/hackbench.o $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) +queuelat: $(OBJDIR)/queuelat.o $(OBJDIR)/librttest.a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(RTTESTLIB) + LIBOBJS =$(addprefix $(OBJDIR)/,error.o rt-get_cpu.o rt-sched.o rt-utils.o) $(OBJDIR)/librttest.a: $(LIBOBJS) $(AR) rcs $@ $^ diff --git a/src/queuelat/Makefile b/src/queuelat/Makefile new file mode 100644 index 0000000..b4d3278 --- /dev/null +++ b/src/queuelat/Makefile @@ -0,0 +1,9 @@ +queuelat: queuelat.o + cc -o queuelat queuelat.o + +queuelat.o: queuelat.c + cc -c -Wall queuelat.c + +clean: + rm queuelat queuelat.o + diff --git a/src/queuelat/README b/src/queuelat/README new file mode 100644 index 0000000..790de3e --- /dev/null +++ b/src/queuelat/README @@ -0,0 +1,142 @@ + +cyclictest does not catch all cases where packet forwarding +latency can exceed a given threshold. + +Example: + +# taskset -c 3 ./queuelat -m 20000 -c 100 -p 13 -f `sh ./get_cpuinfo_mhz.sh` + +# rmmod targeted-ipi; insmod ./targeted-ipi.ko ipidest=3 nripis=200 interval=2 delay=10 + + <...>-4566 [003] .....11 4474.559880: tracing_mark_write: memmove block queue_size=28 queue_dec=279 queue_inc=307 delta=23685 ns + <...>-4566 [003] .....11 4474.559912: tracing_mark_write: memmove block queue_size=63 queue_dec=279 queue_inc=314 delta=24198 ns + <...>-4566 [003] .....11 4474.559937: tracing_mark_write: memmove block queue_size=97 queue_dec=279 queue_inc=313 delta=24090 ns + <...>-4566 [003] .....11 4474.559965: tracing_mark_write: memmove block queue_size=130 queue_dec=279 queue_inc=312 delta=24048 ns + <...>-4566 [003] .....11 4474.559993: tracing_mark_write: memmove block queue_size=162 queue_dec=279 queue_inc=311 delta=23957 ns + <...>-4566 [003] .....11 4474.560018: tracing_mark_write: memmove block queue_size=193 queue_dec=279 queue_inc=310 delta=23912 ns + <...>-4566 [003] .....11 4474.560046: tracing_mark_write: memmove block queue_size=225 queue_dec=279 queue_inc=311 delta=23965 ns + <...>-4566 [003] .....11 4474.560074: tracing_mark_write: memmove block queue_size=257 queue_dec=279 queue_inc=311 delta=23971 ns + <...>-4566 [003] .....11 4474.560102: tracing_mark_write: memmove block queue_size=288 queue_dec=279 queue_inc=310 delta=23902 ns + <...>-4566 [003] .....11 4474.560127: tracing_mark_write: memmove block queue_size=320 queue_dec=279 queue_inc=311 delta=23945 ns + <...>-4566 [003] .....11 4474.560155: tracing_mark_write: memmove block queue_size=351 queue_dec=279 queue_inc=310 delta=23921 ns + <...>-4566 [003] .....11 4474.560180: tracing_mark_write: memmove block queue_size=381 queue_dec=279 queue_inc=309 delta=23839 ns + <...>-4566 [003] .....11 4474.560208: tracing_mark_write: memmove block queue_size=412 queue_dec=279 queue_inc=310 delta=23876 ns + <...>-4566 [003] .....11 4474.560236: tracing_mark_write: memmove block queue_size=443 queue_dec=279 queue_inc=310 delta=23886 ns + <...>-4566 [003] .....11 4474.560261: tracing_mark_write: memmove block queue_size=474 queue_dec=279 queue_inc=310 delta=23901 ns + <...>-4566 [003] .....11 4474.560288: tracing_mark_write: memmove block queue_size=505 queue_dec=279 queue_inc=310 delta=23891 ns + <...>-4566 [003] .....11 4474.560316: tracing_mark_write: memmove block queue_size=535 queue_dec=279 queue_inc=309 delta=23822 ns + <...>-4566 [003] .....11 4474.560341: tracing_mark_write: memmove block queue_size=565 queue_dec=279 queue_inc=309 delta=23815 ns + <...>-4566 [003] .....11 4474.560353: tracing_mark_write: queue length exceeded: queue_size=565 max_queue_len=559 + +# taskset -c 3 cyclictest -m -n -q -p95 -D 60m -h60 -i 200 + +# rmmod targeted-ipi; insmod ./targeted-ipi.ko ipidest=3 nripis=20000 interval=2 delay=10 + +Cyclictest results: + +# Total: 000068099 +# Min Latencies: 00001 +# Avg Latencies: 00002 +# Max Latencies: 00008 +# Histogram Overflows: 00000 + + +----- queuelat basics: + +Queuelat simulates a DPDK queue. From queuelat.c: + +Program parameters: +max_queue_len: maximum latency allowed, in nanoseconds (int). +cycles_per_packet: number of cycles to process one packet (int). +mpps(million-packet-per-sec): million packets per second (float). +tsc_freq_mhz: TSC frequency in MHz, as measured by TSC PIT calibration +(search for "Detected XXX MHz processor" in dmesg, and use the integer part). + +How it works +============ + + The program in essence does: + + b = rdtsc(); + memmove(dest, src, n); + a = rdtsc(); + + delay = convert_to_ns(a - b); + + queue_size += packets_queued_in(delay); + queue_size -= packets_processed; + + if (queue_size > max_queue_len) + FAIL(); + +packets_processed is fixed, and is estimated as follows: +n is determined first, so that the stats bucket with highest count +takes max_latency/2. +for max_latency/2, we calculate how many packets can be drained +in that time (using cycles_per_packet). + +Queuelat output +=============== + +During calibration, queuelat outputs the following table: + +[9600 - 9699] = 0 packetfillrates=[67 - 67] +[9700 - 9799] = 7907 packetfillrates=[67 - 68] +[9800 - 9899] = 42085 packetfillrates=[68 - 69] +[9900 - 9999] = 7 packetfillrates=[69 - 69] +[10000 - 10099] = 1 packetfillrates=[70 - 70] + | | | | + | | | | + | | | |_________ [min - max] number of + | | | packets the queue will reach + | | | with specified mpps in this + | | | time (without draining) + | | | + | | |______________________________ number of hits for this + | | bucket + | | + | |______________________________________ min amount of time (ns) + | this bucket accepts + | + |______________________________________________ max amount of time (ns) + this bucket accepts + + +On success, queuelat outputs a table similar to cyclictest: + +[7000 - 7099] = 0 +[7100 - 7199] = 2 +[7200 - 7299] = 2457 +[7300 - 7399] = 21058 + | | | + | | |___________ Number of processing loops that hit this + | | bucket. + | | + | |____________________ Maximum number of nanoseconds of this bucket. + | + | + |___________________________ Minimum number of nanoseconds in this bucket. + +That is a processing loop will account into a bucket if its duration +is + + min_number_ns_in_bucket < duration < max_number_ns_in_bucket + + +Automatic determination of Mpps +=============================== + +There is a script called determine_maximum_mpps.sh, which should be edited +to include the pinning and -RT priority configuration for your machine. + +PREAMBLE="taskset -c 2 chrt -f 1" +MAXLAT="20000" +CYCLES_PER_PACKET="300" + +This script will find the maximum mpps parameter which can sustain: + + 1) 10 consecutive 30 second runs. + 2) 1 run of 10 minutes. + +Without violating the latency specified with $MAXLAT. + diff --git a/src/queuelat/determine_maximum_mpps.sh b/src/queuelat/determine_maximum_mpps.sh new file mode 100755 index 0000000..cf7a8ca --- /dev/null +++ b/src/queuelat/determine_maximum_mpps.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +# A script to determine the maximum mpps. Logic: +# Increase mpps in 0.5 units +# +# NOTE: please set "PREAMBLE" to the command line you use for +# +PREAMBLE="taskset -c 2 chrt -f 1" +MAXLAT="20000" +CYCLES_PER_PACKET="300" + +echo "Determining maximum mpps the machine can handle" +echo "Will take a few minutes to determine mpps value" +echo "And 10 minutes run to confirm the final mpps value is stable" + +for mpps in `seq 3 3 50`; do + echo testing $mpps Mpps + + outfile=`mktemp` + $PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile + + exceeded=`grep exceeded $outfile` + if [ ! -z "$exceeded" ]; then + echo mpps failed: $mpps + break; + fi + echo success +done +echo first loop mpps: $mpps + +first_mpps=$(($mpps - 1)) +for mpps in `seq $first_mpps -1 3`; do + echo testing $mpps Mpps + + outfile=`mktemp` + $PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile + + exceeded=`grep exceeded $outfile` + if [ -z "$exceeded" ]; then + echo mpps success $mpps + break; + fi + echo failure +done + +second_mpps=`echo "$mpps + 0.3" | bc` +echo second loop mpps: $mpps + +for mpps in `seq $second_mpps 0.3 $first_mpps`; do + echo testing $mpps Mpps + + outfile=`mktemp` + $PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile + + exceeded=`grep exceeded $outfile` + if [ ! -z "$exceeded" ]; then + echo mpps failure $mpps + break; + fi + echo success +done + +echo third loop mpps: $mpps +third_mpps=`echo "$mpps -0.1" | bc` + +for mpps in `seq $third_mpps -0.1 3`; do + echo testing $mpps Mpps + + outfile=`mktemp` + $PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `sh ./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile + + exceeded=`grep exceeded $outfile` + if [ -z "$exceeded" ]; then + echo mpps success $mpps + break; + fi + echo failure +done + +export queuelat_failure=1 +while [ $queuelat_failure == 1 ]; do + + export queuelat_failure=0 + + echo -n "Starting 10 runs of 30 seconds with " + echo "$mpps Mpps" + + for i in `seq 1 10`; do + $PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `./get_cpuinfo_mhz.sh` -p $mpps -t 30 > $outfile + exceeded=`grep exceeded $outfile` + + if [ ! -z "$exceeded" ]; then + echo "mpps failure (run $i) $mpps" + export queuelat_failure=1 + export mpps=`echo $mpps - 0.1 | bc` + break + fi + echo "run $i success" + done + +done + +export queuelat_failure=1 +while [ $queuelat_failure == 1 ]; do + + export queuelat_failure=0 + + echo -n "Starting 10 minutes run with " + echo "$mpps Mpps" + + $PREAMBLE ./queuelat -m $MAXLAT -c $CYCLES_PER_PACKET -f `./get_cpuinfo_mhz.sh` -p $mpps -t 600 > $outfile + exceeded=`grep exceeded $outfile` + + if [ ! -z "$exceeded" ]; then + echo "mpps failure (run $i) $mpps" + export queuelat_failure=1 + export mpps=`echo $mpps - 0.1 | bc` + continue + fi + echo "run $i success" +done + +echo Final mpps is: $mpps + +unset queuelat_failure +unset mpps + + diff --git a/src/queuelat/get_cpuinfo_mhz.sh b/src/queuelat/get_cpuinfo_mhz.sh new file mode 100755 index 0000000..fb5158f --- /dev/null +++ b/src/queuelat/get_cpuinfo_mhz.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +mhz=`cat /proc/cpuinfo | grep "cpu MHz" | uniq | cut -f 3 -d " "` +echo $mhz + diff --git a/src/queuelat/queuelat.c b/src/queuelat/queuelat.c new file mode 100644 index 0000000..8bfed27 --- /dev/null +++ b/src/queuelat/queuelat.c @@ -0,0 +1,635 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NSEC_PER_SEC 1000000000 + +/* Program parameters: + * max_queue_len: maximum latency allowed, in nanoseconds (int). + * cycles_per_packet: number of cycles to process one packet (int). + * mpps(million-packet-per-sec): million packets per second (float). + * tsc_freq_mhz: TSC frequency in MHz, as measured by TSC PIT calibration + * (search for "Detected XXX MHz processor" in dmesg, and use the integer part). + * + * How it works + * ============ + * + * The program in essence does: + * + * b = rdtsc(); + * memmove(dest, src, n); + * a = rdtsc(); + * + * delay = convert_to_ns(a - b); + * + * queue_size += packets_queued_in(delay); + * queue_size -= packets_processed; + * + * if (queue_size > max_queue_len) + * FAIL(); + * + * packets_processed is fixed, and is estimated as follows: + * n is determined first, so that the stats bucket with highest count + * takes max_latency/2. + * for max_latency/2, we calculate how many packets can be drained + * in that time (using cycles_per_packet). + * + */ + +int maxlatency; +int cycles_per_packet; +float mpps; +int timeout_secs; +int min_queue_size_to_print; + +/* Derived constants */ + +float cycles_to_ns; +int max_queue_len; + +int default_n; +int nr_packets_drain_per_block; + +/* + * Parameters for the stats collection buckets + */ + +#define LAST_VAL 70000 +#define VALS_PER_BUCKET 100 +#define NR_BUCKETS LAST_VAL/VALS_PER_BUCKET + +unsigned long long int buckets[NR_BUCKETS+1]; +unsigned long long int total_count; + +#define OUTLIER_BUCKET NR_BUCKETS + +static int val_to_bucket(unsigned long long val) +{ + int bucket_nr = val / VALS_PER_BUCKET; + if (bucket_nr >= NR_BUCKETS) { + return OUTLIER_BUCKET; + } + return bucket_nr; +} + +static void account(unsigned long long val) +{ + int bucket_nr = val_to_bucket(val); + buckets[bucket_nr]++; + total_count++; +} + +static unsigned long long total_samples(void) +{ + int i; + unsigned long long total = 0; + + for (i = 0; i <= OUTLIER_BUCKET; i++) { + total += buckets[i]; + } + + return total; +} + +static void print_all_buckets(void) +{ + int i, print_dotdotdot = 0; + + for (i = 0; i <= OUTLIER_BUCKET; i++) { + int bucket_nr; + unsigned long long val = i*VALS_PER_BUCKET; + + bucket_nr = val_to_bucket(val); + + if (bucket_nr != OUTLIER_BUCKET) { + int n_bucketnr = bucket_nr+1; + if (buckets[bucket_nr] == buckets[n_bucketnr]) { + print_dotdotdot = 1; + continue; + } + if (print_dotdotdot) { + printf("...\n"); + print_dotdotdot = 0; + } + printf("[%lld - %lld] = %lld\n", val, + val + VALS_PER_BUCKET-1, + buckets[bucket_nr]); + } else { + if (print_dotdotdot) { + printf("...\n"); + print_dotdotdot = 0; + } + printf("[%lld - END] = %lld\n", val, + buckets[bucket_nr]); + } + } +} + +static void print_max_bucketsec(void) +{ + int i, bucket_nr; + unsigned long long highest_val = 0; + + for (i = 0; i <= OUTLIER_BUCKET; i++) { + unsigned long long val = i*VALS_PER_BUCKET; + + bucket_nr = val_to_bucket(val); + + if (buckets[bucket_nr] != 0) { + highest_val = val; + } + } + + bucket_nr = val_to_bucket(highest_val); + printf("Max loop processing time: [%lld - %lld] = %lld\n", highest_val, + highest_val + VALS_PER_BUCKET-1, + buckets[bucket_nr]); + + return; +} + +static void print_min_bucketsec(void) +{ + int i, bucket_nr; + unsigned long long min_val = 0; + + for (i = 0; i <= OUTLIER_BUCKET; i++) { + unsigned long long val = i*VALS_PER_BUCKET; + + bucket_nr = val_to_bucket(val); + + if (buckets[bucket_nr] != 0) { + min_val = val; + break; + } + } + + bucket_nr = val_to_bucket(min_val); + printf("Min loop processing time: [%lld - %lld] = %lld\n", min_val, + min_val + VALS_PER_BUCKET-1, + buckets[bucket_nr]); + + return; +} + +static void print_avg_bucketsec(void) +{ + int i, bucket_nr; + unsigned long long total_sum = 0; + unsigned long long nr_hits = 0; + + for (i = 0; i <= OUTLIER_BUCKET; i++) { + unsigned long long val = i*VALS_PER_BUCKET; + unsigned long long maxtime; + + bucket_nr = val_to_bucket(val); + + maxtime = val + VALS_PER_BUCKET-1; + total_sum = total_sum + maxtime*buckets[bucket_nr]; + + nr_hits = nr_hits + buckets[bucket_nr]; + } + + printf("Avg loop processing time: %lld\n", total_sum / nr_hits); +} + +static void print_all_buckets_drainlength(void) +{ + int i, print_dotdotdot = 0; + + for (i = 0; i <= OUTLIER_BUCKET; i++) { + int bucket_nr; + unsigned long long val = i*VALS_PER_BUCKET; + + bucket_nr = val_to_bucket(val); + + if (bucket_nr != OUTLIER_BUCKET) { + unsigned long long mindelta, maxdelta; + int nr_packets_minfill, nr_packets_maxfill; + int n_bucketnr = bucket_nr+1; + + if (buckets[bucket_nr] == buckets[n_bucketnr]) { + print_dotdotdot = 1; + continue; + } + if (print_dotdotdot) { + printf("...\n"); + print_dotdotdot = 0; + } + + mindelta = val; + maxdelta = val + VALS_PER_BUCKET-1; + + nr_packets_minfill = mindelta * mpps * 1000000 / NSEC_PER_SEC; + nr_packets_maxfill = maxdelta * mpps * 1000000 / NSEC_PER_SEC; + + printf("[%lld - %lld] = %lld packetfillrates=[%d - %d]\n", val, + val + VALS_PER_BUCKET-1, + buckets[bucket_nr], + nr_packets_minfill, + nr_packets_maxfill); + } else { + if (print_dotdotdot) { + printf("...\n"); + print_dotdotdot = 0; + } + printf("[%lld - END] = %lld\n", val, + buckets[bucket_nr]); + } + } +} + +typedef unsigned long long cycles_t; +typedef unsigned long long usecs_t; +typedef unsigned long long u64; + +#ifdef __x86_64__ +#define DECLARE_ARGS(val, low, high) unsigned low, high +#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32)) +#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high) +#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) +#else +#define DECLARE_ARGS(val, low, high) unsigned long long val +#define EAX_EDX_VAL(val, low, high) (val) +#define EAX_EDX_ARGS(val, low, high) "A" (val) +#define EAX_EDX_RET(val, low, high) "=A" (val) +#endif + +static inline unsigned long long __rdtscll(void) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("cpuid; rdtsc" : EAX_EDX_RET(val, low, high)); + + return EAX_EDX_VAL(val, low, high); +} + +#define rdtscll(val) do { (val) = __rdtscll(); } while (0) + +static void init_buckets(void) +{ + int i; + + for (i=0; i <= NR_BUCKETS; i++) + buckets[i] = 0; + + total_count = 0; +} + +static int find_highest_count_bucket(void) +{ + int i; + int max_bucket = 0; + unsigned long long int max_val = 0; + + for (i=0; i <= NR_BUCKETS; i++) { + if (buckets[i] > max_val) { + max_bucket = i; + max_val = buckets[i]; + } + } + + return max_bucket; +} + +int tracing_mark_fd; +static void trace_open(void) +{ + int fd; + + fd = open("/sys/kernel/debug/tracing/trace_marker", O_RDWR); + + if (fd == -1) { + perror("open"); + exit(0); + } + tracing_mark_fd = fd; +} + +static void trace_write(char *buf, int len) +{ + int ret; + + ret = write(tracing_mark_fd, buf, len); + if (ret == -1) { + perror("write"); + exit(0); + } +} + +static void run_n(int n) +{ + u64 a, b; + void *dest, *src; + int i, delta, loops = 50000; + + init_buckets(); + + dest = malloc(n); + if (dest == NULL) { + printf("failure to allocate %d bytes " + " for dest\n", n); + exit(0); + } + src = malloc(n); + if (src == NULL) { + printf("failure to allocate %d bytes " + " for src\n", n); + exit(0); + } + + memset(src, 0, n); + + memmove(dest, src, n); + for (i = 0; i < loops; i++) { + rdtscll(b); + memmove(dest, src, n); + rdtscll(a); + delta = (a - b) * cycles_to_ns; + account(delta); + } + + free(dest); + free(src); + + return; +} + +/* + * Find the size of n such that the stats for the + * function call + * + * memmove(dest, src, n). + * + * Takes MaximumLat/2 in the bucket that has most + * entries. + * + */ +static int measure_n(void) +{ + int time, bucket_nr; + int n = 100000, delta = 0; + + do { + if (delta > 0) + n = n+1000; + else if (delta < 0) + n = n-1000; + + run_n(n); + bucket_nr = find_highest_count_bucket(); + + time = bucket_nr * VALS_PER_BUCKET; + + delta = maxlatency/2 - time; + } while (abs(delta) > VALS_PER_BUCKET*2); + + return n; +} + +static void convert_to_ghz(double tsc_freq_mhz) +{ + float tsc_freq_ghz = tsc_freq_mhz/1000; + + cycles_to_ns = 1/tsc_freq_ghz; + + printf("tsc_freq_ghz = %f, cycles_to_ns = %f\n", tsc_freq_ghz, + cycles_to_ns); +} + + +static void print_exit_info(void) +{ + print_all_buckets(); + printf("\n ---------------- \n"); + print_min_bucketsec(); + print_max_bucketsec(); + print_avg_bucketsec(); + +} + +void main_loop(void) +{ + u64 a, b; + void *dest, *src; + int delta; + int queue_size = 0; + + trace_open(); + + init_buckets(); + + dest = malloc(default_n); + if (dest == NULL) { + printf("failure to allocate %d bytes " + " for dest\n", default_n); + exit(0); + } + src = malloc(default_n); + if (src == NULL) { + printf("failure to allocate %d bytes " + " for src\n", default_n); + exit(0); + } + + memset(src, 0, default_n); + memmove(dest, src, default_n); + + while (1) { + char buf[500]; + int ret; + int nr_packets_fill; + + rdtscll(b); + memmove(dest, src, default_n); + rdtscll(a); + delta = (a - b) * cycles_to_ns; + account(delta); + + /* fill up the queue by the amount of + * time that passed */ + nr_packets_fill = delta * mpps * 1000000 / NSEC_PER_SEC; + queue_size += nr_packets_fill; + + /* decrease the queue by the amount of packets + * processed in maxlatency/2 nanoseconds of + * full processing. + */ + + queue_size -= nr_packets_drain_per_block; + + if (queue_size < 0) + queue_size = 0; + + if (queue_size <= min_queue_size_to_print) + continue; + + ret = sprintf(buf, "memmove block queue_size=%d queue_dec=%d" + " queue_inc=%d delta=%d ns\n", queue_size, + nr_packets_drain_per_block, + nr_packets_fill, delta); + trace_write(buf, ret); + + if (queue_size > max_queue_len) { + printf("queue length exceeded: " + " queue_size=%d max_queue_len=%d\n", + queue_size, max_queue_len); + ret = sprintf(buf, "queue length exceeded: " + "queue_size=%d max_queue_len=%d\n", + queue_size, max_queue_len); + trace_write(buf, ret); + print_exit_info(); + exit(0); + } + } + + free(dest); + free(src); +} + +void sig_handler(int sig) +{ + print_exit_info(); + exit(0); +} + +static void install_signals(void) +{ + signal(SIGALRM, sig_handler); + signal(SIGINT, sig_handler); +} + +int calculate_nr_packets_drain_per_block(void) +{ + int maxcount; + int i, time; + int found = 0; + int bucket_nr = find_highest_count_bucket(); + + maxcount = total_samples() / 40; + + for (i = bucket_nr+1; i <= NR_BUCKETS; i++) { + if (buckets[i] < maxcount) { + found = 1; + break; + } + } + + if (found == 0) { + printf("error, did not find right bucket with < 10%% of total\n"); + exit(0); + } + + time = i*VALS_PER_BUCKET + VALS_PER_BUCKET-1; + nr_packets_drain_per_block = time / (cycles_per_packet*cycles_to_ns); + + return nr_packets_drain_per_block; +} + +int main(int argc, char **argv) +{ + double tsc_freq_mhz; + float max_queue_len_f; + char *mvalue = NULL; + char *cvalue = NULL; + char *pvalue = NULL; + char *fvalue = NULL; + char *tvalue = NULL; + char *qvalue = NULL; + int index; + int c; + + install_signals(); + + opterr = 0; + + while ((c = getopt (argc, argv, "m:c:p:f:t:q:")) != -1) + switch (c) + { + case 'm': + mvalue = optarg; + break; + case 'c': + cvalue = optarg; + break; + case 'p': + pvalue = optarg; + break; + case 'f': + fvalue = optarg; + break; + case 't': + tvalue = optarg; + break; + case 'q': + qvalue = optarg; + break; + case '?': + if (optopt == 'm' || optopt == 'c' || optopt == 'p' || + optopt == 'f' || optopt == 't' || optopt == 'q') + printf ("Option -%c requires an argument.\n", optopt); + else if (isprint (optopt)) + printf ("Unknown option `-%c'.\n", optopt); + else + printf ( "Unknown option character `\\x%x'.\n", + +optopt); + return 1; + default: + abort (); + } + + if (mvalue == NULL || cvalue == NULL || pvalue == NULL || + fvalue == NULL) { + printf("options -m, -c, -p and -f required.\n"); + printf("usage: %s -m maxlatency -c cycles_per_packet -p mpps(million-packet-per-sec) -f tsc_freq_mhz [-t timeout (in secs)] [-q min_queue_len_to_print_trace]\n", argv[0]); + return 1; + } + + maxlatency = atoi(mvalue); + cycles_per_packet = atoi(cvalue); + mpps = atof(pvalue); + tsc_freq_mhz = atof(fvalue); + + if (tvalue) { + int alarm_secs; + alarm_secs = atoi(tvalue); + alarm(alarm_secs); + } + + if (qvalue) { + min_queue_size_to_print = atoi(qvalue); + } + + if (optind != argc) { + for (index = optind; index < argc; index++) { + printf ("Error, non-option argument %s\n", argv[index]); + } + return 1; + } + + convert_to_ghz(tsc_freq_mhz); + + max_queue_len_f = maxlatency / (cycles_per_packet*cycles_to_ns); + max_queue_len = max_queue_len_f; + + printf("max_queue_len = %d\n", max_queue_len); + default_n = measure_n(); + + nr_packets_drain_per_block = calculate_nr_packets_drain_per_block(); + print_all_buckets_drainlength(); + + printf("default_n=%d nr_packets_drain_per_block=%d\n", default_n, + nr_packets_drain_per_block); + + main_loop(); + + return 0; +} + diff --git a/src/queuelat/targeted-ipi/Kbuild b/src/queuelat/targeted-ipi/Kbuild new file mode 100644 index 0000000..9bdd5c6 --- /dev/null +++ b/src/queuelat/targeted-ipi/Kbuild @@ -0,0 +1,2 @@ +obj-m := targeted-ipi.o + diff --git a/src/queuelat/targeted-ipi/Makefile b/src/queuelat/targeted-ipi/Makefile new file mode 100644 index 0000000..ee5591f --- /dev/null +++ b/src/queuelat/targeted-ipi/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_TARGETED_IPI) += targeted-ipi.o diff --git a/src/queuelat/targeted-ipi/README b/src/queuelat/targeted-ipi/README new file mode 100644 index 0000000..f36711c --- /dev/null +++ b/src/queuelat/targeted-ipi/README @@ -0,0 +1,12 @@ + +To build: + +make -C M=$PWD + +To build against the running kernel use: + +make -C /lib/modules/`uname -r`/build M=$PWD + + +Read targeted-ipi.c for the parameters. + diff --git a/src/queuelat/targeted-ipi/targeted-ipi.c b/src/queuelat/targeted-ipi/targeted-ipi.c new file mode 100644 index 0000000..4a10e0b --- /dev/null +++ b/src/queuelat/targeted-ipi/targeted-ipi.c @@ -0,0 +1,51 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +static int ipidest; +module_param(ipidest, int, S_IRUGO); + +/* number of ipis */ +static int nripis; +module_param(nripis, int, S_IRUGO); + +/* interval between consecutive IPI calls */ +static int interval; +module_param(interval, int, S_IRUGO); + +/* how many microseconds to delay in IPI handler */ +static int delay; +module_param(delay, int, S_IRUGO); + +static void ipi_handler(void *info) +{ + udelay(interval); +} + +static int targeted_ipi_init(void) +{ + int ret, i; + + for (i=0; i < nripis; i++) + { + ret = smp_call_function_single(ipidest, ipi_handler, NULL, 1); + if (ret) { + printk(KERN_ERR "i=%d smp_call_function_single ret=%d\n", i, ret); + return 0; + } + udelay(interval); + } + + return 0; +} + +static void targeted_ipi_exit(void) +{ +} + +module_init(targeted_ipi_init); +module_exit(targeted_ipi_exit); +MODULE_LICENSE("GPL");