org.apache.nutch.indexer
Class IndexingJob
- java.lang.Object
- org.apache.hadoop.conf.Configured
- org.apache.nutch.indexer.IndexingJob
- All Implemented Interfaces:
- org.apache.hadoop.conf.Configurable, org.apache.hadoop.util.Tool
public class IndexingJob extends org.apache.hadoop.conf.Configured implements org.apache.hadoop.util.Tool
Generic indexer which relies on the plugins implementing IndexWriter
Field Summary
Fields Modifier and Type Field and Description static org.slf4j.Logger
LOG
Constructor Summary
Constructors Constructor and Description IndexingJob()
IndexingJob(org.apache.hadoop.conf.Configuration conf)
Method Summary
Methods Modifier and Type Method and Description void
index(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path linkDb,
List
void
index(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path linkDb,
List
void
index(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path linkDb,
List
void
index(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path linkDb,
List
static void
main(String[] args)
int
run(String[] args)
-
Methods inherited from class org.apache.hadoop.conf.Configured
getConf, setConf
-
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
-
Methods inherited from interface org.apache.hadoop.conf.Configurable
getConf, setConf
Field Detail
-
LOG
public static org.slf4j.Logger LOG
Constructor Detail
-
IndexingJob
public IndexingJob()
-
IndexingJob
public IndexingJob(org.apache.hadoop.conf.Configuration conf)
Method Detail
-
index
public void index(org.apache.hadoop.fs.Path crawlDb, org.apache.hadoop.fs.Path linkDb, List<org.apache.hadoop.fs.Path> segments, boolean noCommit) throws IOException
- Throws:
- <code>IOException</code>
-
index
public void index(org.apache.hadoop.fs.Path crawlDb, org.apache.hadoop.fs.Path linkDb, List<org.apache.hadoop.fs.Path> segments, boolean noCommit, boolean deleteGone) throws IOException
- Throws:
- <code>IOException</code>
-
index
public void index(org.apache.hadoop.fs.Path crawlDb, org.apache.hadoop.fs.Path linkDb, List<org.apache.hadoop.fs.Path> segments, boolean noCommit, boolean deleteGone, String params) throws IOException
- Throws:
- <code>IOException</code>
-
index
public void index(org.apache.hadoop.fs.Path crawlDb, org.apache.hadoop.fs.Path linkDb, List<org.apache.hadoop.fs.Path> segments, boolean noCommit, boolean deleteGone, String params, boolean filter, boolean normalize) throws IOException
- Throws:
- <code>IOException</code>
-
run
public int run(String[] args) throws Exception
- Specified by:
- <code>run</code> in interface <code>org.apache.hadoop.util.Tool</code>
- Throws:
- <code>Exception</code>
-
main
public static void main(String[] args) throws Exception
- Throws:
- <code>Exception</code>