001 (ns org.clojars.punit-naik.clj-ml.pca
002 (:require [org.clojars.punit-naik.clj-ml.utils.matrix :as mu]))
003
004 (defn principal-components
005 "Get's the data as a list of lists where each list inside the bigger list represents a row of data
006 Selects the `n` number of principal compoennts"
007 [data n]
008 (let [feature-data (map butlast data)
009 label-data (map last data)
010 covar-feature-data (mu/covariance feature-data)
011 evals (mu/eigen-values covar-feature-data)
012 evecs (mu/eigen-vectors covar-feature-data evals)
013 evals-evecs (sort-by first > (map vector evals evecs))]
014 (->> (take n evals-evecs)
015 (map (fn [[_ evec]]
016 (mu/matrix-multiply feature-data (mu/transpose [evec]))))
017 (apply map vector)
018 (map (fn [label row]
019 (conj (vec (flatten row)) label)) label-data))))