/*
Copyright 2007 David Spencer.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/

package com.tropo.lucene;

import java.io.*;
import java.util.*;
import java.io.InputStream; // lucene clash

import org.apache.lucene.analysis.*;
import org.apache.lucene.index.*;
import org.apache.lucene.store.*;
import org.apache.lucene.document.*;
import org.apache.lucene.analysis.standard.*;


import javax.mail.*;
import javax.mail.event.*;
import javax.mail.*;
import javax.activation.*;
import javax.mail.internet.*;

import com.sun.mail.imap.IMAPFolder;

/**
 * Build an index from an IMAP message store.
 * Need <a href="http://java.sun.com/products/javamail/">Java Mail</a>
 * which in turn needs the
 * <a href="http://java.sun.com/products/javabeans/glasgow/jaf.html">Java Activation Framework</a>.
 */
public final class ImapIndex
{
	/**
	 * Main driver to index an IMAP Store.
	 * Arguments:
	 *
	 * <ul>
	 * <li> <b>-user USER</b> : Mandatory, user on IMAP server. </li>
	 * <li> <b>-pw PASSWORD</b> : Mandatory, users password. </li>
	 * <li> <b>-host HOST</b> :  Mandatory, host name of IMAP server. </li>
	 * <li> <b>-folder FOLDERNAME</b> : Optional, folder name to start with, default is to start at top of tree and index entire message store. </li>
	 * <li> <b>-index INDEX</b> :  Optinal index name, default is "imap_index" in current directory. </li> 
	 * </ul>
	 *
	 */
	public static void main(String[] args)
	{
		// Big win, improves things 60% for me though I may have high latency as
		// I'm in Santa Monica and my IMAP server is in London. 
		System.setProperty( "mail.imap.partialfetch", "false");
		
		try
		{
			String index_name = "imap_index";
			String pw = null;
			String box = null;
			String user = null;
			String host = null; 

			// parse args
			for ( int i = 0; i< args.length; i++)
			{
				if ( args[ i].equals( "-pw"))
				{
					pw = args[ ++i];
				}
				else if ( args[ i].equals( "-user"))
				{
					user = args[ ++i];
				}
				else if ( args[ i].equals( "-host"))
				{
					host = args[ ++i];
				}
				else if ( args[ i].equals( "-folder"))
				{
					box = args[ ++i];
				}
				else if ( args[ i].equals( "-index"))
				{
					index_name = args[ ++i];
				}
				else if ( args[ i].equals( "-h") ||
						  args[ i].indexOf( "help") >= 0)
				{
					o.println( "Syntax is -host HOST -user USER -pw PASSWORD [-index INDEX] [-folder FOLDER]");
					o.println( "-folder and -index are optional, others are needed and can appear in any order");
					System.exit( 10);
				}
				else
				{
					fatal( "Unknown arg: " + args[ i]);
				}
			}

			// validate args
			if ( user == null) fatal( "User name must be set with -user");			
			if ( pw == null) fatal( "Password must be set with -pw");
			if ( host == null) fatal( "IMAP server host name  must be set with -host");


			urlPrefix = "imap://" + user + "@" + host + ":143/fetch%3EUID%3E/";

			// open index
			o.println( "Index: " + index_name);
			Directory d = FSDirectory.getDirectory( index_name, true);
			writer = new IndexWriter( d, analyzer, true);
			
			// connect to IMAP
			Properties props = System.getProperties();
			Session session = Session.getDefaultInstance(props, null);
			Store store = session.getStore("imap");
			
			o.println( "Connecting to " + host + " as " + user);
			store.connect( host, user, pw);
			o.println( "Connected");

			// get default or specified folder
			Folder folder;
			if ( box == null)
				folder = store.getDefaultFolder();
			else
				folder = store.getFolder( box);
			if (folder == null || !folder.exists())
				fatal( "Invalid folder: " + box);

			// go thru folder and everything underneath
			traverse( folder);

			long dt = now() - start;
			writer.optimize();
			writer.close();
			o.println();
			o.println( "All done, bytes read=" + (bytes/1024) + "(kb)" +
					   " / " + (bytes/1024/1024) + "(MB)," +
					   " time=" +(dt/1000L) +  "(s)," +
					   " rate=" + (((1000L * bytes)/dt)/1024) + "(kb/sec)");
			o.println( "Messages added to index: "+ nIndexed);

			if ( badAttachments > 0)
				o.println( "\tbad attachments: "+ badAttachments);
			if ( badDocs > 0)
				o.println( "\tbad docs: "+ badDocs);			
			System.exit( 0);
		}
		catch( Throwable t)
		{
			fatal( "Bug detected main", t);
		}
	}

	/**
	 *
	 */
	private static void traverse( Folder folder)
		throws MessagingException, IOException
	{
		curFolderName = folder.getFullName();

		// ignore folders that are probably not of interest - could be made customizable...
		if ( curFolderName.equalsIgnoreCase( "junk")) return;
		if ( curFolderName.equalsIgnoreCase( "trash")) return;
		if ( curFolderName.equalsIgnoreCase( "inbox.junk")) return;
		if ( curFolderName.equalsIgnoreCase( "inbox.trash")) return;		
		
		if (folder == null || !folder.exists())
			fatal( "Invalid folder: " + curFolderName);

		index( folder); 
	}

	/** 
	 * Index this folder.
	 */
	private static void index( Folder folder)
		throws IOException ,
			   MessagingException
	{

		curUF = ( UIDFolder) folder; 
		if ( (folder.getType() & Folder.HOLDS_MESSAGES) != 0)
		{
			folder.open(Folder.READ_ONLY); 
			Message[] ar = folder.getMessages(); // get refs to all msgs
			if ( ar == null)
				ar = new Message[ 0]; // dummy

			folder.fetch( ar, fp); // fetch headers

			o.println();
			long t0 = now();
			o.println( "FOLDER: " +curFolderName +" messages=" + ar.length);

			for ( int i = 0; i < ar.length; i++) // for every msgs
			{
				long t00 = now();
				long b0 = bytes;
				final Document doc = new Document();
				doc.add( Field.Keyword( F_FOLDER, curFolderName)); 
				index( doc, ar[ i]); // index this message
				try
				{
					writer.addDocument( doc); // add it
					nIndexed++;
				}
				catch( IOException ioe) // can be side effect of hosed up mail headers
				{
					err.println( "* OUCH BAD DOC: " +ioe);
					ioe.printStackTrace( err);
					err.println( "* message=" + ar[i]  + "/" + ar[i].getSubject());
					continue;
				}
				long t11 = now();
				long db = bytes - b0;
				if ( verbose)
				{
					o.println( "\t" + (i+1) + "/" + ar.length+ " dt=" +
							   (t11-t00) + "(ms) " + "bytes=" + db + " " + ar[i].getSubject());
				}
			}
			long t1 = now();
			long dt = t1 - t0;
			long elapsed = t1 - start;
			long rate = (bytes * 1000L) / elapsed;

			o.println( "Folder done in " + (dt/1000L) + "(s), rate="  +
					   (rate/1024) + "(kb/s), " +
					   " total data = " +
					   (bytes/1024) + "(kb), " +
					   "total time = " + (elapsed/1000L) + "(s)");

			folder.close( false); // false => do not expunge
		}

		// recurse if possible
		if ( (folder.getType() & Folder.HOLDS_FOLDERS) != 0)
		{
			Folder[] far = folder.list();
			if ( far != null)
				for ( int i = 0; i < far.length; i++)
					traverse( far[ i]);
		}
	}

	/**
	 * Index one message.
	 */
	private static void index( final Document doc, final Message m)
		throws MessagingException,
			   IOException
	{
		final long uid = curUF.getUID( m);

		// form a URL that mozilla seems to accept. Couldn't get it to accept
		// what I thought was the standard

		final  String url = urlPrefix + curFolderName + "%3E" + uid;

		final String subject = m.getSubject();
		final Date recv = m.getReceivedDate();
		final Date sent = m.getSentDate();		
		//-------------------------------------------------------
		// data gathered, now add to doc

		if ( subject != null)
			doc.add( Field.Text( F_SUBJECT, m.getSubject()));

		if ( recv != null)
			doc.add( Field.Keyword( F_RECEIVED, DateField.timeToString( recv.getTime())));

		if ( sent != null)
			doc.add( Field.Keyword( F_SENT,     DateField.timeToString( sent.getTime())));

		doc.add( Field.Keyword( F_URL, url));

		Address[] addrs = m.getAllRecipients();
		if ( addrs != null)
			for ( int j = 0; j < addrs.length; j++)
				doc.add( Field.Keyword( F_TO, ""+addrs[ j]));

		addrs = m.getFrom();
		if ( addrs != null)
			for ( int j = 0; j < addrs.length; j++)
				doc.add( Field.Keyword( F_FROM, ""+addrs[ j]));
		addrs = m.getReplyTo();
		if ( addrs != null)				
			for ( int j = 0; j < addrs.length; j++)
				doc.add( Field.Keyword( F_REPLY_TO, ""+addrs[ j]));
		
		doc.add(  Field.Keyword( F_UID, ""+uid));

		// could ignore docs that have the deleted flag set
		for ( int j = 0; j < FLAGS.length; j++)
		{
			boolean val = m.isSet( FLAGS[ j]);
			doc.add( Field.Keyword( SFLAGS[ j],
									(val ? "true" : "false")));
		}

		// now special case for mime
		if ( m instanceof MimeMessage)
		{
			mime++;
			MimeMessage mm = (MimeMessage) m;
			index( doc, mm);

		}
		else
		{
			nmime++;

			final DataHandler dh = m.getDataHandler();

			doc.add( Field.Text( F_CONTENTS,
								 new InputStreamReader( dh.getInputStream())));
		}
	}

	/**
	 * Index a MIME message, which seems to be all of them.
	 */
	private static void index( final Document doc, final MimeMessage mm)
		throws MessagingException,
			   IOException
	{
		//o.println( "\n\n[index mm]: " + mm.getSubject());
		
		long size = mm.getSize();
		int lines = mm.getLineCount();
		if ( size > 0) // -1 is n/a
			doc.add( Field.UnIndexed( F_SIZE, "" + size));

		indexContent( doc, mm);
	}

	/**
	 * Index a multi-part message.
	 */
	private static void index( final Document doc, final MimeMultipart mmp)
		throws MessagingException,
			   IOException
	{
		//o.println( "[index mpp]");
		
		int n = mmp.getCount();
		for ( int i = 0; i < n; i++) // go thru all body parts
		{
			BodyPart bp = mmp.getBodyPart( i);
			// same thing ends up happening regardless, if/else left it to show structure
			if ( bp instanceof MimeBodyPart)
			{
				MimeBodyPart mbp = ( MimeBodyPart) bp;
				indexContent( doc, mbp);
			}
			else
			{
				indexContent( doc, bp); 
			}
		}
	}
	
	/**
	 * Index a message part.
	 */
	private static void index( final Document doc, final Part p)
		throws MessagingException,
			   IOException
	{
		//o.println( "[index p]");		
		indexContent( doc, p);
	}
	
	/**
	 * Index a mime part.
	 */
	private static void index( final Document doc, final MimePart mp)
		throws MessagingException,
			   IOException
	{
		//o.println( "[index mp]");
		indexContent( doc, mp);
	}

	/**
	 * Index a part.
	 */
	private static void indexContent( final Document doc, final Part p)
		throws MessagingException,
			   IOException
	{
		//o.println( "[indexContent]");
		
		int size = p.getSize();
		if ( size > 0) bytes += size;
		String ct = p.getContentType();
		String cd = p.getDescription();
		//o.println( "\t" + ct + " " + cd);		
		Object content = null;
		
		if ( ct != null)
			doc.add( Field.Keyword( F_CT, ct));
		if ( cd != null)
			doc.add( Field.Keyword( F_CD, cd));
		
		if ( ct != null &&
			 ct.toLowerCase().startsWith( "image/"))
			return; // no point for now but maybe in the future we see if any forms such as jpegs have some strings		

		try
		{
			content = p.getContent(); // get content object, indirectly calls into JAF which decodes based on MIME type and char set
		}
		catch( IOException ioe)
		{
			badAttachments++;
			err.println( "OUCH decoding attachment, p=" + p + " ioe=" + ioe);
			ioe.printStackTrace( err);
			doc.add( Field.Text( F_CONTENTS,
								 new InputStreamReader( p.getInputStream())));
			return;
		}

		if ( content instanceof MimeMultipart)
		{
			index( doc, (MimeMultipart) content);
		}
		else if ( content instanceof MimePart)
		{
			index( doc, (MimePart) content);
		}
		else if ( content instanceof Part)
		{
			index( doc, (Part) content);
		}		
		else if ( content instanceof String)
		{
			doc.add( Field.Text( F_CONTENTS,
								 new StringReader( (String) content)));
		}
		else if ( content instanceof InputStream)
		{
			doc.add( Field.Text( F_CONTENTS,
								 new InputStreamReader( (InputStream) content)));
		}
		else
		{
			// could be a warning
			fatal( "***** Strange content: " + content + "/" + content.getClass() +
					   " ct=" +ct + " cd="+ cd);
		}
	}	
	
	/**
	 * Current time.
	 */
	private static long now()
	{
		return System.currentTimeMillis();
	}

	/**
	 * Fatal error.
	 */
	private static void fatal( String s)
	{
		err.println( "Fatal error: " + s);
		System.exit( 123);
	}
	/**
	 * Fatal error.
	 */
	private static void fatal( String s, Throwable t)
	{
		err.println( "Fatal error: " + s + ": " + t);
		t.printStackTrace( System.err);
		System.exit( 123);
	}	



	// total bytes read
	private static long bytes;

	
	private static IndexWriter writer;

	// msg flags
	private final static Flags.Flag[] FLAGS  =new Flags.Flag[] { 
		Flags.Flag.ANSWERED,
		Flags.Flag.DELETED,
		Flags.Flag.DRAFT,
		Flags.Flag.FLAGGED,
		Flags.Flag.RECENT,
		Flags.Flag.SEEN};

	// no toString() in Flags.Flag :(
	private static final String[] SFLAGS =new String[] { // no toString
		"answered",
		"deleted",
		"draft",
		"flagged",
		"recent",
		"seen"};

	// headers to fetech
	private static final FetchProfile fp = new FetchProfile();
	static
	{
		fp.add( FetchProfile.Item.ENVELOPE); // standard headers
		fp.add( FetchProfile.Item.CONTENT_INFO); 
		fp.add( UIDFolder.FetchProfileItem.UID);
		fp.add( com.sun.mail.imap.IMAPFolder.FetchProfileItem.HEADERS);
		fp.add( com.sun.mail.imap.IMAPFolder.FetchProfileItem.SIZE);
	}

	// globals so methods don't take a million args
	private static UIDFolder curUF;
	private static String curFolderName;
	private static String urlPrefix;


	private static boolean verbose = true;
	

	// statistics
	private static int mime;
	private static int nmime;
	private static int badAttachments;
	private static int badDocs;
	private static int nIndexed; // number of docs indexed
	private static final long start = now();		

	private static final Analyzer analyzer = new StandardAnalyzer();

	// field names
	private static final String F_SIZE = "size";
	private static final String F_URL = "url";
	private static final String F_SUBJECT = "subject";
	private static final String F_SENT = "sent";
	private static final String F_FOLDER = "folder";
	private static final String F_RECEIVED = "received";
	private static final String F_CONTENTS = "contents";
	private static final String F_FROM = "from";
	private static final String F_TO = "to";
	private static final String F_UID = "uid";	
	private static final String F_REPLY_TO = "reply-to";
	private static final String F_CT = "content-type";
	private static final String F_CD = "content-description";
	private static final PrintStream o = System.out;
	private static final PrintStream err = System.err;	

}

