Comp202: Principles of Object-Oriented Programming II
Fall 2004 -- Lecture #23: Dictionary and Hash Table   


Dictionary

A major theme in computing is the theme of storage/retrieval/removal: store data somewhere so that it can later be retrieved and discarded if no longer needed, all of this in the most efficient manner.  The abstraction of these computing activities is embodied in the notion of what is called a dictionary, expressed in Java as an interface as follows.

 

DictionaryPair.java
package dict;

import java.lang.*;

/**
 * Represents a (key, value) pair stored in a dictionary,
 * where key is a Comparable.
 */
public class DictionaryPair implements Comparable {
    private Comparable _key;
    private Object _value;
    
    /**
     * Initializes this DictionaryPair to a given (key, value) pair.
     */
    public DictionaryPair(Comparable key, Object value) {
        _key   = key;
        _value = value;
    }
    
    /**
     * Compares the key of this DictionaryPair against the key of the
     * other DictionayPair.
     * @param other a DictionaryPair
     */
    public int compareTo(Object other) {
        return _key.compareTo(((DictionaryPair)other)._key);
    }
    
    /**
     * Returns the key of this DictionaryPair.
     */
    public Comparable getKey() {
        return _key;
    }
    
    /**
     * Returns the value of this DictionaryPair.
     */
    public Object getValue() {
        return _value;
    }
    
    /**
     * Shows "(", followed by the String representation of the key, followed by
     * a ",", followed by the String representation of the associcated value,
     * followed by a ")".
     */
    public String toString() {
        return "(" + _key + "," + _value + ")";
    }
}

IDictionary.java
package dict;

import java.lang.*;
import listFW.*;

/*
 * Defines an interface for a simple dictionary.
 */
public interface IDictionary {
    /**
     * Clears the contents of the dictionary leaving it empty.
     */
    public void clear();

    /**
     * Returns true if the dictionary is empty and false otherwise.
     * Non OO!
     * How can we eliminate this check?
     */
    public boolean isEmpty();

    /**
     * Returns true if the dictionary is full and false otherwise.
     */
    public boolean isFull();

    /**
     * Returns an IList of DictionaryPairs corresponding to the entire
     * contents of the dictionary.
     * @param lf a factory to manufacture IList objects.
     */
    public IList elements(IListFactory lf);

    /**
     * Returns the DictionaryPair with the given key.  If there is not
     * a DictionaryPair with the given key, returns null. 
     *
     * Returns a DictionaryPair rather than the value alone so that
     * the user can distinguish between not finding the key and
     * finding the pair (key, null). 
     */
    public DictionaryPair lookup(Comparable key);

    /**
     * Inserts the given key and value.  If the given key is already
     * in the dictionary, the given value replaces the key's old
     * value. 
     */
    public void insert(Comparable key, Object value);

    /**
     * Removes the DictionaryPair with the given key and returns it.
     * If there is not a DictionaryPair with the given key, returns
     * null.
     */
    public DictionaryPair remove(Comparable key);
}

IDictionay is an example what we call an unrestricted  access container (as opposed to restricted access container).  A simple way to implement IDictionary is to use an LRStruct.  

DictLRS.java
package dict;

import java.lang.*;
import lrs.*;
import listFW.*;
import listFW.factory.*;

/**
 * An implementation of IDictionary using an LRStruct to hold the
 * DictionaryPairs. 
 */
public class DictLRS implements IDictionary {
    
    /**
     * Visitor to check for emptiness.
     * Need only one for all DictLRS.
     * Non OO!
     */
    private static IAlgo IsEmpty = new IAlgo() {
        public Object emptyCase(LRStruct host, Object input) {
            return Boolean.TRUE;
        }
        
        public Object nonEmptyCase(LRStruct host, Object input) {
            return Boolean.FALSE;
        }
    };
    
    /*
     * A list of DictionaryPairs ordered by key
     */
    private LRStruct _lrs = new LRStruct();
    
    /**
     * Clears the contents of the dictionary leaving it empty.
     *
     * Implemented by replacing the existing LRStruct with a new,
     * empty one.
     */
    public void clear() {
        _lrs = new LRStruct();
    }
    
    /**
     * Returns true if the dictionary is empty and false otherwise.
     *
     * Implemented as a visitor to LRStruct.
     */
    public boolean isEmpty() {
        return ((Boolean)_lrs.execute(IsEmpty, null)).booleanValue();
    }
    
    /**
     * Returns false always.
     */
    public boolean isFull() {
        return false;
    }

    /**
     * Returns an IList of DictionaryPairs corresponding to the entire
     * contents of the dictionary.
     *
     * Implemented as a visitor to LRStruct.
     */
    public IList elements(final IListFactory lf) {        
        return (IList)_lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return lf.makeEmptyList();
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                return lf.makeNEList(host.getFirst(),
                                     (IList)host.getRest().execute(this, input));
            }
        }, null);
    }
    
    /**
     * Returns the DictionaryPair with the given key.  If there is not
     * a DictionaryPair with the given key, returns null. 
     *
     * Returns a DictionaryPair rather than the value alone so that
     * the user can distinguish between not finding the key and
     * finding the pair (key, null). 
     *
     * Implemented as a visitor to LRStruct.
     */
    public DictionaryPair lookup(Comparable key) {
        return (DictionaryPair)_lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return null;
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                DictionaryPair first = (DictionaryPair)host.getFirst();
                int result = first.getKey().compareTo(input);
                
                if (result > 0)  // host > input
                    return null;
                else if (result == 0) // host == input
                    return first;
                else   // host < input
                    return host.getRest().execute(this, input);
            }
        }, key);
    }
    
    /**
     * Inserts the given key and value.  If the given key is already
     * in the dictionary, the given value replaces the key's old
     * value. 
     *
     * Implemented as a visitor to LRStruct that inserts the key and
     * value in order.
     */
    public void insert(Comparable key, Object value) {
        _lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return host.insertFront(input);
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                DictionaryPair first = (DictionaryPair)host.getFirst();
                int result = first.compareTo(input);
                
                if (result > 0)  // host > input
                    return host.insertFront(input);
                else if (result == 0) // key == input
                    return host.setFirst(input);
                else   // host < input
                    return host.getRest().execute(this, input);
            }
        }, new DictionaryPair(key, value));
    }
    
    /**
     * Removes the DictionaryPair with the given key and returns it.
     * If there is not a DictionaryPair with the given key, returns
     * null.
     *
     * Implemented as a visitor to LRStruct.
     */
    public DictionaryPair remove(Comparable key) {
        return (DictionaryPair)_lrs.execute(new IAlgo() {
            public Object emptyCase(LRStruct host, Object input) {
                return null;
            }
            
            public Object nonEmptyCase(LRStruct host, Object input) {
                DictionaryPair first = (DictionaryPair)host.getFirst();
                int result = first.getKey().compareTo(input);
                
                if (result > 0)  // host > input
                    return null;
                else if (result == 0) { // host == input
                    host.removeFront();
                    return first;
                } else   // host < input
                    return host.getRest().execute(this, input);
            }
        }, key);
    }
    
    /**
     * Delegates the conversion to the LRStruct toString().
     */
    public String toString() {
        return _lrs.toString();
    }
}

The problem with such an implementation is that each of the operations, insert, lookup and remove takes O(N) time, where N is the total number of elements in the dictionary.  Using a self-balanced tree will guarantee O(logN) time.  

Can we do better than that?  The answer is yes and no.  With an data structure called "hash table" coupled with an appropriate "hash function", we can achieve an amortized performance of O(1), that is constant time!

Hash Tables and Hash Functions


Hash Tables


The Problem: Collisions


Chaining


Performance


Hash Table Implementations

package dict;

import java.lang.*;
import lrs.*;
import listFW.*;

/**
 * An IDictionary implemented using a hash table.  Collisions are handled
 * using chaining.  The chains are implemented using DictLRS.
 *
 * Uses the method hashCode() defined by class Object as the hash
 * function.  Any class may override this method with a new
 * implementation.
 *
 * @author Alan L. Cox
 * @since 03/28/03
 */
public class DictHash implements IDictionary {
    /*
     * Initialize _table to reference a single-element array of
     * IDictionary, containing in its single element a reference to an
     * empty DictLRS.
     */
    private IDictionary[] _table = { new DictLRS() };
    private int _tableOccupancy = 0;

     /*
     * An IList factory used to linearalize each internal DictLRS and
     * resize the _table array.

     */
    private IListFactory _lf;
    
     /*

     * An upper bound on the load factor.
     */    
    private double _loadFactor;

    public DictHash(IListFactory lf, double loadFactor) {
        _lf = lf;

        _loadFactor = loadFactor;
    }

 

    /**
     * Clears the contents of the dictionary leaving it empty.
     *
     * Implemented by replacing the existing LRStruct with a new,
     * empty one.
     */
    public void clear() {
        _table = new DictLRS[1];
        _table[0] = new DictLRS();
        _tableOccupancy = 0;
    }

    /**
     * Returns true if the dictionary is empty and false otherwise.
     */
    public boolean isEmpty() {
        return _tableOccupancy == 0;
    }

    /**
     * Returns an IList of DictionaryPairs corresponding to the entire
     * contents of the dictionary.
     *
     * Note that the elements are not in order.
     */
    public IList elements(IListFactory lf) {
        IList l = lf.makeEmptyList();

        for (int i = 0; i < _table.length; i++)
            l = (AList)_table[i].elements(lf).execute(new IListAlgo() {
                public Object emptyCase(IEmptyList host, Object input) {
                    return input;
                }

                public Object nonEmptyCase(INEList host, Object input) {
                    return lf.makeNEList(host.getFirst(),
                                         (IList)host.getRest().execute(this,
                                                                       input));
                }
            }, l);

        return l;
    }

    /**
     * Returns the DictionaryPair with the given key.  If there is not
     * a DictionaryPair with the given key, returns null.
     *
     * Returns a DictionaryPair rather than the value alone so that
     * the user can distinguish between not finding the key and
     * finding the pair (key, null).
     *
     * This method is O(1) in the expected case and O(n) in the worst
     * case.
     *
     * @param key the key to lookup
     * @return the DictionaryPair found
     */
    public DictionaryPair lookup(Comparable key) {
        int index = key.hashCode() % _table.length;

        return _table[index].lookup(key);
    }

    /**
     * Inserts the given key and value.  If the given key is already
     * in the dictionary, the given value replaces the key's old
     * value.
     *
     * This method is O(1) in both the expected case and the worst
     * case if we amortize the cost of doubling the hash table over
     * subsequent insert()'s.
     *
     * @param key the key to insert
     * @param value the value to insert
     */
    public void insert(Comparable key, Object value) {
        if (_tableOccupancy >= (_loadFactor * _table.length)) {
            int i;

            final IDictionary newTable[] = new IDictionary[2*_table.length];

            for (i = 0; i < newTable.length; i++)
                newTable[i] = new DictLRS();

            for (i = 0; i < _table.length; i++) {
                _table[i].elements(_lf).execute(new IListAlgo() {
                    public Object emptyCase(AList host, Object input) {
                        return null;
                    }

                    public Object nonEmptyCase(AList host, Object input) {
                        DictionaryPair pair = (DictionaryPair) host.getFirst();
                        int index = pair.getKey().hashCode() % newTable.length;

                        newTable[index].insert(pair.getKey(), pair.getValue());

                        return host.getRest().execute(this, input);
                    }
                }, null);
            }
            _table = newTable;
        }
        int index = key.hashCode() % _table.length;

        _tableOccupancy++;
        _table[index].insert(key, value);
    }

    /**
     * Removes the DictionaryPair with the given key and returns it.
     * If there is not a DictionaryPair with the given key, returns
     * null.
     *
     * This method is O(1) in the expected case and O(n) in the worst
     * case.
     *
     * @param key the key to remove
     * @return the DictionaryPair removed
     */
    public DictionaryPair remove(Comparable key) {
        int index = key.hashCode() % _table.length;

        DictionaryPair pair = _table[index].remove(key);

        if (pair != null)
            _tableOccupancy--;

        return pair;
    }

    /**
     * Returns a string representing the contents of the dictionary.
     */
    public String toString() {
        return elements(_lf).toString();
    }
}


Last Revised Thursday, 03-Jun-2010 09:52:09 CDT

©2004 Stephen Wong and Dung Nguyen