diff options
| author | Christine Dodrill <me@christine.website> | 2019-06-14 18:09:39 +0000 |
|---|---|---|
| committer | Christine Dodrill <me@christine.website> | 2019-06-14 18:09:39 +0000 |
| commit | b75b53a6ac5653a79fc1c7bf95b9fb8a85ce71de (patch) | |
| tree | da7b05d874bf730c8a96dd9109e6cf5e13ea350e | |
| parent | 0b87828dc7b052120e93f680c162068e4ca65f97 (diff) | |
| download | x-b75b53a6ac5653a79fc1c7bf95b9fb8a85ce71de.tar.xz x-b75b53a6ac5653a79fc1c7bf95b9fb8a85ce71de.zip | |
entropy
| -rw-r--r-- | entropy/doc.go | 2 | ||||
| -rw-r--r-- | entropy/shannon.go | 24 | ||||
| -rw-r--r-- | entropy/shannon_test.go | 56 |
3 files changed, 82 insertions, 0 deletions
diff --git a/entropy/doc.go b/entropy/doc.go new file mode 100644 index 0000000..fabe727 --- /dev/null +++ b/entropy/doc.go @@ -0,0 +1,2 @@ +// Package entropy has a few helper functions to calculate the bit entropy of strings. +package entropy diff --git a/entropy/shannon.go b/entropy/shannon.go new file mode 100644 index 0000000..699fd91 --- /dev/null +++ b/entropy/shannon.go @@ -0,0 +1,24 @@ +package entropy + +import "math" + +// Shannon measures the Shannon entropy of a string. +// See http://bearcave.com/misl/misl_tech/wavelets/compression/shannon.html for the algorithmic explanation. +func Shannon(value string) (bits int) { + frq := make(map[rune]float64) + + //get frequency of characters + for _, i := range value { + frq[i]++ + } + + var sum float64 + + for _, v := range frq { + f := v / float64(len(value)) + sum += f * math.Log2(f) + } + + bits = int(math.Ceil(sum*-1)) * len(value) + return +} diff --git a/entropy/shannon_test.go b/entropy/shannon_test.go new file mode 100644 index 0000000..6533aac --- /dev/null +++ b/entropy/shannon_test.go @@ -0,0 +1,56 @@ +package entropy + +import "testing" + +func isHighEntropy(bits int) bool { + if bits >= 128 { + return true + } + + return false +} + +func TestShannon(t *testing.T) { + var cases = []struct { + input string + highEntropy bool + }{ + { + input: "AAAAAAAAAAA", + highEntropy: false, + }, + { + input: "0", + highEntropy: false, + }, + { + input: "false", + highEntropy: false, + }, + { + input: "668108162888", + highEntropy: false, + }, + { + input: "0127B6-85D8BD-E21ADE", + highEntropy: false, + }, + { + input: "ZmYwOTZmNmQyNWFjMWY4ZGY4MDBjNjQ3N2IwOGMxMDY4NTE1ODFjMjhlZmRjZGNmZmE2ZTM2MTQ4NjA2YTFkNDM2MDljZjc1MDFhODgxOTI0NGZmMmNmNmE1NWEyNDEzNmJjMWQxZmVkMmUwZmQ4ZDc5ODdiMjhiNzU4ZWUzYWYK", + highEntropy: true, + }, + } + + for _, c := range cases { + t.Run(c.input, func(t *testing.T) { + bits := Shannon(c.input) + ent := isHighEntropy(bits) + + t.Logf("entropy is: %d", bits) + + if ent != c.highEntropy { + t.Errorf("%q was expected to be high entropy: %v got: %v", c.input, c.highEntropy, ent) + } + }) + } +} |
