001 (ns org.clojars.punit-naik.clj-ml.utils.io
002 (:require [clojure.java.io :as io]
003 [clojure.string :as clj-str])
004 (:import [java.io BufferedReader]))
005
006 (defn find-field-names
007 "Finds the names of the fields of an XSV file"
008 [file-path delimiter]
009 (map #(-> % clj-str/lower-case keyword)
010 (-> (io/reader file-path) BufferedReader. line-seq
011 first clj-str/trim-newline (clj-str/split delimiter))))
012
013 (defn read-file-lines
014 "Reads a file from the `file-path` and returns a lazy sequence of strings"
015 [file-path]
016 (-> (io/reader file-path) BufferedReader. line-seq))
017
018 (defn row->map
019 "Takes `rows` as a collection, fetches it's first row,
020 Splits it on `delimiter`,
021 Coverts the resulting row into a map by using keys from `field-names`,
022 And then finally appends it to the `result`"
023 [rows delimiter field-names result]
024 (as-> rows $$
025 (first $$)
026 (clj-str/split $$ delimiter)
027 (zipmap field-names $$)
028 (vector $$)
029 (concat result $$)))
030
031 (defn xsv-reader
032 "Reads an XSV file and generates a collection of maps where each map in the colection
033 represent a row with the keys as the column names and it's values as the row's values
034 NOTE: X represents a delimiter
035 `file-path` is the XSV file's path
036 `delimiter` is the delimiter value used in the XSV file, the default value of it is `,`
037 `field-names` is the collection of column names of the XSV file
038 If it is not supplied, the function will read it from the first row of the file"
039 ([file-path] (xsv-reader file-path true))
040 ([file-path has-field-names?] (xsv-reader file-path has-field-names? #","))
041 ([file-path has-field-names? delimiter]
042 (xsv-reader file-path has-field-names? delimiter
043 (find-field-names file-path delimiter)))
044 ([file-path has-field-names? delimiter field-names]
045 (loop [rows (read-file-lines file-path)
046 first? has-field-names?
047 result (lazy-seq [])]
048 (if (empty? rows)
049 result
050 (recur (rest rows) false
051 (cond->> result
052 (not first?) (row->map rows delimiter field-names)))))))
053
054 (defn write-xsv
055 "Takes a file (path, name and extension) and
056 csv-data (vector of vectors with all values) and
057 writes csv file."
058 [file xsv-data]
059 (with-open [writer (io/writer file)]
060 (doseq [[i xd] (map vector (range (count xsv-data)) xsv-data)]
061 (.write writer (cond->> xd (pos? i) (str "\n"))))))
062
063 (defn maps->xsv-data
064 "Takes a collection of maps and returns csv-data
065 (vector of vectors with all values)."
066 [delimiter maps]
067 (let [columns (-> maps first keys)
068 headers (clj-str/join delimiter (map name columns))
069 rows (map (fn [m] (clj-str/join delimiter (vals m))) maps)]
070 (into [headers] rows)))
071
072 (defn write-xsv-from-maps
073 "Takes a file (path, name and extension) and a collection of maps
074 transforms data (vector of vectors with all values)
075 writes csv file.
076 Optionall provide a delimiter, defaults to `,`"
077 ([file maps] (write-xsv-from-maps file maps ","))
078 ([file maps delimiter] (->> maps (maps->xsv-data delimiter) (write-xsv file))))