001 (ns org.clojars.punit-naik.clj-ml.utils.generic
002 (:require [clojure.string :refer [join split]]))
003
004 (defn first-n-zeros
005 "Given a collection, this function finds the number of zero elements of the collection from the start"
006 [coll]
007 (loop [c coll
008 count 0
009 last-elem-zero? nil]
010 (if (or (false? last-elem-zero?)
011 (empty? c))
012 count
013 (let [lez? (zero? (first c))]
014 (recur (rest c) (cond-> count lez? inc) lez?)))))
015
016 (defn mean-coll
017 "Calculates the mean of a collection `c`"
018 [c]
019 (double (/ (reduce + c) (count c))))
020
021 (defn replace-nth
022 "Replaces `n`th item from `coll` with `replacement`"
023 [coll n replacement]
024 (concat
025 (take n coll)
026 (list replacement)
027 (drop (inc n) coll)))
028
029 (defn rationalise
030 "Rationalises a number into a fraction, same as `clojure.core/rationalize`
031 But this will always return the numerator as is, without the decimal
032 Hence the denominator will be in multiples of 10"
033 [n]
034 (if (and (not (integer? n))
035 (not (zero? (- n (int n)))))
036 (let [d (as-> (str n) $
037 (split $ #"\.")
038 (second $) (count $)
039 (take $ (repeat 10))
040 (apply * $))]
041 (vector (Math/round (* n d)) d))
042 [n 1]))
043
044 (defn shingles
045 "Generate shingles out of a string `s` (could also work with other types of collections)
046 The shingle size is specified by `n`
047 If `s` is a very small string (of count less than or equal to 5),
048 just a list of it's individual chars is returned"
049 [s n]
050 (if (= n 1)
051 (map str s)
052 (loop [idx-coll (range (- (count s) (dec n)))
053 result []]
054 (if (empty? idx-coll)
055 result
056 (recur (rest idx-coll)
057 (conj result (reduce str (take n (drop (first idx-coll) s)))))))))
058
059 (defn approximate-decimal
060 "Given a decimal number `num`, this function approximates/selects it's value upto `n` decimal places."
061 ([num] (approximate-decimal num 5))
062 ([num n]
063 (let [num (double num)]
064 (if-let [[f s] (and (not (Double/isNaN num))
065 (split (str num) #"\."))]
066 (Double/parseDouble (str f "." (join (take n s))))
067 num))))
068
069 (defn error-decimal
070 "Given a precision value as an integer, this function returns the corresponding error value"
071 [n]
072 (Double/parseDouble (str "0." (join (take n (repeat 0))) 1)))
073
074 (defn round-decimal
075 "Rounds of a decimal based on `precision`"
076 [num]
077 (let [[l t] (split (str num) #"\.")]
078 (if (and (>= (count t) 5) ; Choosing for precision of 5 digits after decimal for now
079 (not (zero? (Double/parseDouble t))))
080 (let [percentage (* (- 1.0 (Double/parseDouble (str "0." t))) 100.0)
081 negative? (re-find #"\-" (str l))]
082 (cond-> (Double/parseDouble (last (split (str l) #"\-")))
083 (<= percentage 0.2) inc
084 (>= percentage 99.9) identity
085 negative? (* -1)
086 (and (not (<= percentage 0.2))
087 (not (>= percentage 99.9))) ((if negative? - +) (Double/parseDouble (str "0." t))))) num)))