/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**
 * Class that can run the hierarchical clustering algorithm with the given
 * parameters.
 *
 * @param distance
 *        Function that should return the distance between two items.
 *        Defaults to clusterlib.euclidean_distance.
 * @param merge
 *        Function that should take in two items and return a merged one.
 *        Defaults to clusterlib.average_linkage.
 * @param threshold
 *        The maximum distance between two items for which their clusters
 *        can be merged.
 */
function HierarchicalClustering(distance, merge, threshold) {
  this.distance = distance || clusterlib.euclidean_distance;
  this.merge = merge || clusterlib.average_linkage;
  this.threshold = threshold == undefined ? Infinity : threshold;
}

HierarchicalClustering.prototype = {
  /**
   * Run the hierarchical clustering algorithm on the given items to produce
   * a final set of clusters.  Uses the parameters set in the constructor.
   *
   * @param items
   *        An array of "things" to cluster - this is the domain-specific
   *        collection you're trying to cluster (colors, points, etc.)
   * @param snapshotGap
   *        How many iterations of the clustering algorithm to wait between
   *        calling the snapshotCallback
   * @param snapshotCallback
   *        If provided, will be called as clusters are merged to let you view
   *        the progress of the algorithm.  Passed the current array of
   *        clusters, cached distances, and cached closest clusters.
   *
   * @return An array of merged clusters.  The represented item can be
   *         found in the "item" property of the cluster.
   */
  cluster: function HC_cluster(items, snapshotGap, snapshotCallback) {
    // array of all remaining clusters
    let clusters = [];
    // 2D matrix of distances between each pair of clusters, indexed by key
    let distances = [];
    // closest cluster key for each cluster, indexed by key
    let neighbors = [];
    // an array of all clusters, but indexed by key
    let clustersByKey = [];

    // set up clusters from the initial items array
    for (let index = 0; index < items.length; index++) {
      let cluster = {
        // the item this cluster represents
        item: items[index],
        // a unique key for this cluster, stays constant unless merged itself
        key: index,
        // index of cluster in clusters array, can change during any merge
        index: index,
        // how many clusters have been merged into this one
        size: 1
      };
      clusters[index] = cluster;
      clustersByKey[index] = cluster;
      distances[index] = [];
      neighbors[index] = 0;
    }

    // initialize distance matrix and cached neighbors
    for (let i = 0; i < clusters.length; i++) {
      for (let j = 0; j <= i; j++) {
        var dist = (i == j) ? Infinity :
          this.distance(clusters[i].item, clusters[j].item);
        distances[i][j] = dist;
        distances[j][i] = dist;

        if (dist < distances[i][neighbors[i]]) {
          neighbors[i] = j;
        }
      }
    }

    // merge the next two closest clusters until none of them are close enough
    let next = null, i = 0;
    for (; next = this.closestClusters(clusters, distances, neighbors); i++) {
      if (snapshotCallback && (i % snapshotGap) == 0) {
        snapshotCallback(clusters);
      }
      this.mergeClusters(clusters, distances, neighbors, clustersByKey,
                         clustersByKey[next[0]], clustersByKey[next[1]]);
    }
    return clusters;
  },

  /**
   * Once we decide to merge two clusters in the cluster method, actually
   * merge them.  Alters the given state of the algorithm.
   *
   * @param clusters
   *        The array of all remaining clusters
   * @param distances
   *        Cached distances between pairs of clusters
   * @param neighbors
   *        Cached closest clusters
   * @param clustersByKey
   *        Array of all clusters, indexed by key
   * @param cluster1
   *        First cluster to merge
   * @param cluster2
   *        Second cluster to merge
   */
  mergeClusters: function HC_mergeClus(clusters, distances, neighbors,
                                       clustersByKey, cluster1, cluster2) {
    let merged = { item: this.merge(cluster1.item, cluster2.item),
                   left: cluster1,
                   right: cluster2,
                   key: cluster1.key,
                   size: cluster1.size + cluster2.size };

    clusters[cluster1.index] = merged;
    clusters.splice(cluster2.index, 1);
    clustersByKey[cluster1.key] = merged;

    // update distances with new merged cluster
    for (let i = 0; i < clusters.length; i++) {
      var ci = clusters[i];
      var dist;
      if (cluster1.key == ci.key) {
        dist = Infinity;
      } else if (this.merge == clusterlib.single_linkage) {
        dist = distances[cluster1.key][ci.key];
        if (distances[cluster1.key][ci.key] >
            distances[cluster2.key][ci.key]) {
          dist = distances[cluster2.key][ci.key];
        }
      } else if (this.merge == clusterlib.complete_linkage) {
        dist = distances[cluster1.key][ci.key];
        if (distances[cluster1.key][ci.key] <
            distances[cluster2.key][ci.key]) {
          dist = distances[cluster2.key][ci.key];
        }
      } else if (this.merge == clusterlib.average_linkage) {
        dist = (distances[cluster1.key][ci.key] * cluster1.size
              + distances[cluster2.key][ci.key] * cluster2.size)
              / (cluster1.size + cluster2.size);
      } else {
        dist = this.distance(ci.item, cluster1.item);
      }

      distances[cluster1.key][ci.key] = distances[ci.key][cluster1.key]
                                      = dist;
    }

    // update cached neighbors
    for (let i = 0; i < clusters.length; i++) {
      var key1 = clusters[i].key;
      if (neighbors[key1] == cluster1.key ||
          neighbors[key1] == cluster2.key) {
        let minKey = key1;
        for (let j = 0; j < clusters.length; j++) {
          var key2 = clusters[j].key;
          if (distances[key1][key2] < distances[key1][minKey]) {
            minKey = key2;
          }
        }
        neighbors[key1] = minKey;
      }
      clusters[i].index = i;
    }
  },

  /**
   * Given the current state of the algorithm, return the keys of the two
   * clusters that are closest to each other so we know which ones to merge
   * next.
   *
   * @param clusters
   *        The array of all remaining clusters
   * @param distances
   *        Cached distances between pairs of clusters
   * @param neighbors
   *        Cached closest clusters
   *
   * @return An array of two keys of clusters to merge, or null if there are
   *         no more clusters close enough to merge
   */
  closestClusters: function HC_closestClus(clusters, distances, neighbors) {
    let minKey = 0, minDist = Infinity;
    for (let i = 0; i < clusters.length; i++) {
      var key = clusters[i].key;
      if (distances[key][neighbors[key]] < minDist) {
        minKey = key;
        minDist = distances[key][neighbors[key]];
      }
    }
    if (minDist < this.threshold) {
      return [minKey, neighbors[minKey]];
    }
    return null;
  }
};

var clusterlib = {
  hcluster: function hcluster(items, distance, merge, threshold, snapshotGap,
                              snapshotCallback) {
    return (new HierarchicalClustering(distance, merge, threshold))
           .cluster(items, snapshotGap, snapshotCallback);
  },

  single_linkage: function single_linkage(cluster1, cluster2) {
    return cluster1;
  },

  complete_linkage: function complete_linkage(cluster1, cluster2) {
    return cluster1;
  },

  average_linkage: function average_linkage(cluster1, cluster2) {
    return cluster1;
  },

  euclidean_distance: function euclidean_distance(v1, v2) {
    let total = 0;
    for (let i = 0; i < v1.length; i++) {
      total += Math.pow(v2[i] - v1[i], 2);
    }
    return Math.sqrt(total);
  },

  manhattan_distance: function manhattan_distance(v1, v2) {
    let total = 0;
    for (let i = 0; i < v1.length; i++) {
      total += Math.abs(v2[i] - v1[i]);
    }
    return total;
  },

  max_distance: function max_distance(v1, v2) {
    let max = 0;
    for (let i = 0; i < v1.length; i++) {
      max = Math.max(max, Math.abs(v2[i] - v1[i]));
    }
    return max;
  }
};