aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristine Dodrill <me@christine.website>2019-06-14 18:09:39 +0000
committerChristine Dodrill <me@christine.website>2019-06-14 18:09:39 +0000
commitb75b53a6ac5653a79fc1c7bf95b9fb8a85ce71de (patch)
treeda7b05d874bf730c8a96dd9109e6cf5e13ea350e
parent0b87828dc7b052120e93f680c162068e4ca65f97 (diff)
downloadx-b75b53a6ac5653a79fc1c7bf95b9fb8a85ce71de.tar.xz
x-b75b53a6ac5653a79fc1c7bf95b9fb8a85ce71de.zip
entropy
-rw-r--r--entropy/doc.go2
-rw-r--r--entropy/shannon.go24
-rw-r--r--entropy/shannon_test.go56
3 files changed, 82 insertions, 0 deletions
diff --git a/entropy/doc.go b/entropy/doc.go
new file mode 100644
index 0000000..fabe727
--- /dev/null
+++ b/entropy/doc.go
@@ -0,0 +1,2 @@
+// Package entropy has a few helper functions to calculate the bit entropy of strings.
+package entropy
diff --git a/entropy/shannon.go b/entropy/shannon.go
new file mode 100644
index 0000000..699fd91
--- /dev/null
+++ b/entropy/shannon.go
@@ -0,0 +1,24 @@
+package entropy
+
+import "math"
+
+// Shannon measures the Shannon entropy of a string.
+// See http://bearcave.com/misl/misl_tech/wavelets/compression/shannon.html for the algorithmic explanation.
+func Shannon(value string) (bits int) {
+ frq := make(map[rune]float64)
+
+ //get frequency of characters
+ for _, i := range value {
+ frq[i]++
+ }
+
+ var sum float64
+
+ for _, v := range frq {
+ f := v / float64(len(value))
+ sum += f * math.Log2(f)
+ }
+
+ bits = int(math.Ceil(sum*-1)) * len(value)
+ return
+}
diff --git a/entropy/shannon_test.go b/entropy/shannon_test.go
new file mode 100644
index 0000000..6533aac
--- /dev/null
+++ b/entropy/shannon_test.go
@@ -0,0 +1,56 @@
+package entropy
+
+import "testing"
+
+func isHighEntropy(bits int) bool {
+ if bits >= 128 {
+ return true
+ }
+
+ return false
+}
+
+func TestShannon(t *testing.T) {
+ var cases = []struct {
+ input string
+ highEntropy bool
+ }{
+ {
+ input: "AAAAAAAAAAA",
+ highEntropy: false,
+ },
+ {
+ input: "0",
+ highEntropy: false,
+ },
+ {
+ input: "false",
+ highEntropy: false,
+ },
+ {
+ input: "668108162888",
+ highEntropy: false,
+ },
+ {
+ input: "0127B6-85D8BD-E21ADE",
+ highEntropy: false,
+ },
+ {
+ input: "ZmYwOTZmNmQyNWFjMWY4ZGY4MDBjNjQ3N2IwOGMxMDY4NTE1ODFjMjhlZmRjZGNmZmE2ZTM2MTQ4NjA2YTFkNDM2MDljZjc1MDFhODgxOTI0NGZmMmNmNmE1NWEyNDEzNmJjMWQxZmVkMmUwZmQ4ZDc5ODdiMjhiNzU4ZWUzYWYK",
+ highEntropy: true,
+ },
+ }
+
+ for _, c := range cases {
+ t.Run(c.input, func(t *testing.T) {
+ bits := Shannon(c.input)
+ ent := isHighEntropy(bits)
+
+ t.Logf("entropy is: %d", bits)
+
+ if ent != c.highEntropy {
+ t.Errorf("%q was expected to be high entropy: %v got: %v", c.input, c.highEntropy, ent)
+ }
+ })
+ }
+}