Mega Code Archive

 
Categories / Java / Ant
 

Nutch ant script

<?xml version="1.0"?> <!--  Licensed to the Apache Software Foundation (ASF) under one or more  contributor license agreements.  See the NOTICE file distributed with  this work for additional information regarding copyright ownership.  The ASF licenses this file to You under the Apache License, Version 2.0  (the "License"); you may not use this file except in compliance with  the License.  You may obtain a copy of the License at      http://www.apache.org/licenses/LICENSE-2.0  Unless required by applicable law or agreed to in writing, software  distributed under the License is distributed on an "AS IS" BASIS,  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the specific language governing permissions and  limitations under the License. --> <project name="Nutch" default="job">   <!-- Load all the default properties, and any the user wants    -->   <!-- to contribute (without having to type -D or edit this file -->   <property file="${user.home}/build.properties" />   <property file="${basedir}/build.properties" />   <property file="${basedir}/default.properties" />   <property name="test.junit.output.format" value="plain"/>     <!-- the normal classpath -->   <path id="classpath">     <pathelement location="${build.classes}"/>     <fileset dir="${lib.dir}">       <include name="*.jar" />     </fileset>   </path>   <!-- the unit test classpath -->   <dirname property="plugins.classpath.dir" file="${build.plugins}"/>   <path id="test.classpath">     <pathelement location="${test.build.classes}" />     <pathelement location="${conf.dir}"/>     <pathelement location="${test.src.dir}"/>     <pathelement location="${plugins.classpath.dir}"/>     <path refid="classpath"/>     <pathelement location="${build.dir}/${final.name}.job" />   </path>   <!-- xmlcatalog definition for xslt task -->   <xmlcatalog id="docDTDs">      <dtd publicId="-//W3C//DTD XHTML 1.0 Transitional//EN"                       location="${xmlcatalog.dir}/xhtml1-transitional.dtd"/>    </xmlcatalog>    <!-- ----------------------------------------------------== -->   <!-- Stuff needed by all targets                            -->   <!-- ----------------------------------------------------== -->   <target name="init">     <mkdir dir="${build.dir}"/>     <mkdir dir="${build.classes}"/>     <mkdir dir="${test.build.dir}"/>     <mkdir dir="${test.build.classes}"/>     <touch datetime="01/25/1971 2:00 pm">       <fileset dir="${conf.dir}" includes="**/*.template"/>     </touch>     <copy todir="${conf.dir}" verbose="true">       <fileset dir="${conf.dir}" includes="**/*.template"/>       <mapper type="glob" from="*.template" to="*"/>     </copy>     <!-- unpack hadoop scripts from hadoop jar into bin directory -->     <mkdir dir="${build.dir}/hadoop"/>     <unjar dest="${build.dir}/hadoop">       <fileset dir="${lib.dir}" includes="hadoop*.jar"/>       <patternset includes="bin.tgz"/>     </unjar>          <untar src="${build.dir}/hadoop/bin.tgz" dest="bin" compression="gzip"/>     <!-- fix broken library paths with spaces -->     <replace file="bin/hadoop" token="PlatformName" value="PlatformName | sed -e 's/ /_/g'"/>     <chmod dir="bin" perm="ugo+rx" includes="*.sh,hadoop"/>     <!-- unpack hadoop webapp from hadoop jar into build directory -->     <mkdir dir="${build.dir}/webapps"/>     <unjar dest="${build.dir}">       <fileset dir="${lib.dir}" includes="hadoop*.jar"/>       <patternset includes="webapps/**"/>     </unjar>   </target>   <!-- ----------------------------------------------------== -->   <!-- Compile the Java files                                 -->   <!-- ----------------------------------------------------== -->   <target name="compile" depends="compile-core, compile-plugins"/>   <target name="compile-core" depends="init">     <javac       encoding="${build.encoding}"       srcdir="${src.dir}"      includes="org/apache/nutch/**/*.java"      destdir="${build.classes}"      debug="${javac.debug}"      optimize="${javac.optimize}"      target="${javac.version}"      source="${javac.version}"      deprecation="${javac.deprecation}">       <classpath refid="classpath"/>     </javac>       </target>   <target name="compile-plugins">     <ant dir="src/plugin" target="deploy" inheritAll="false"/>   </target>   <target name="generate-src" depends="init">     <javacc target="${src.dir}/org/apache/nutch/analysis/NutchAnalysis.jj"             javacchome="${javacc.home}">     </javacc>     <fixcrlf srcdir="${src.dir}" eol="lf" includes="**/*.java"/>   </target>   <target name="dynamic" depends="generate-src, compile">   </target>   <!-- ----------------------------------------------------------------== -->   <!-- Make nutch.jar                                                     -->   <!-- ----------------------------------------------------------------== -->   <!--                                                                    -->   <!-- ----------------------------------------------------------------== -->   <target name="jar" depends="compile-core">     <copy file="${conf.dir}/nutch-default.xml"           todir="${build.classes}"/>     <copy file="${conf.dir}/nutch-site.xml"           todir="${build.classes}"/>     <jar jarfile="${build.dir}/${final.name}.jar"          basedir="${build.classes}">       <manifest>       </manifest>     </jar>   </target>   <!-- ----------------------------------------------------------------== -->   <!-- Make job jar                                                       -->   <!-- ----------------------------------------------------------------== -->   <!--                                                                    -->   <!-- ----------------------------------------------------------------== -->   <target name="job" depends="compile">     <jar jarfile="${build.dir}/${final.name}.job">       <!-- If the build.classes has the nutch config files because the jar            command command has run, exclude them.  The conf directory has             them.       -->       <zipfileset dir="${build.classes}"                   excludes="nutch-default.xml,nutch-site.xml"/>       <zipfileset dir="${conf.dir}" excludes="*.template,hadoop*.*"/>       <zipfileset dir="${lib.dir}" prefix="lib"                   includes="**/*.jar" excludes="hadoop-*.jar"/>       <zipfileset dir="${build.plugins}" prefix="plugins"/>     </jar>   </target>   <!-- ----------------------------------------------------------------== -->   <!-- Make nutch.war                                                     -->   <!-- ----------------------------------------------------------------== -->   <!--                                                                    -->   <!-- ----------------------------------------------------------------== -->   <target name="war" depends="jar,compile,generate-docs">     <!-- generate the nutch.xml (servlet context) file -->     <xslt in="${basedir}/conf/nutch-default.xml"           out="${build.dir}/nutch.xml"           style="${basedir}/conf/context.xsl">         <xmlcatalog refid="docDTDs"/>       <outputproperty name="indent" value="yes"/>     </xslt>     <war destfile="${build.dir}/${final.name}.war"       webxml="${web.src.dir}/web.xml">       <fileset dir="${web.src.dir}/jsp"/>       <zipfileset dir="${docs.src}" includes="include/*.html"/>       <zipfileset dir="${build.docs}" includes="*/include/*.html"/>       <fileset dir="${docs.dir}"/>       <lib dir="${lib.dir}">         <include name="lucene*.jar"/>         <include name="taglibs-*.jar"/>         <include name="hadoop-*.jar"/>         <include name="dom4j-*.jar"/>         <include name="xerces-*.jar"/>         <include name="tika-*.jar"/>         <include name="apache-solr-*.jar"/>         <include name="commons-httpclient-*.jar"/>         <include name="commons-codec-*.jar"/>         <include name="commons-collections-*.jar"/>         <include name="commons-beanutils-*.jar"/>         <include name="commons-cli-*.jar"/>         <include name="commons-lang-*.jar"/>         <include name="commons-logging-*.jar"/>         <include name="log4j-*.jar"/>       </lib>       <lib dir="${build.dir}">         <include name="${final.name}.jar"/>       </lib>       <classes dir="${conf.dir}" excludes="**/*.template"/>       <classes dir="${web.src.dir}/locale"/>       <classes file="${web.src.dir}/log4j.properties"/>       <zipfileset prefix="WEB-INF/classes/plugins" dir="${build.plugins}"/>       <webinf dir="${lib.dir}">         <include name="taglibs-*.tld"/>       </webinf>     </war>    </target>   <!-- ----------------------------------------------------------------== -->   <!-- Compile test code                                                  -->    <!-- ----------------------------------------------------------------== -->   <target name="compile-core-test" depends="compile-core">     <javac       encoding="${build.encoding}"       srcdir="${test.src.dir}"      includes="org/apache/nutch/**/*.java"      destdir="${test.build.classes}"      debug="${javac.debug}"      optimize="${javac.optimize}"      target="${javac.version}"      source="${javac.version}"      deprecation="${javac.deprecation}">       <classpath refid="test.classpath"/>     </javac>       </target>   <!-- ----------------------------------------------------------------== -->   <!-- Run code checks (PMD)                                              -->    <!-- ----------------------------------------------------------------== -->   <target name="pmd" depends="compile">   <property name="pmd.report" location="${build.dir}/pmd-report.html" />   <taskdef name="pmd" classname="net.sourceforge.pmd.ant.PMDTask">     <classpath>       <fileset dir="${lib.dir}">             <include name="pmd-ext/*.jar" />             <include name="xerces*.jar" />           </fileset>     </classpath>   </taskdef>   <pmd shortFilenames="true" failonerror="true" failOnRuleViolation="false"      encoding="${build.encoding}" failuresPropertyName="pmd.failures">     <ruleset>unusedcode</ruleset>           <!--ruleset>basic</ruleset-->           <!--ruleset>optimizations</ruleset-->       <formatter type="html" toFile="${pmd.report}" />     <!-- <formatter type="xml" toFile="${tempbuild}/$report_pmd.xml"/> -->   <fileset dir="${basedir}/src">           <include name="java/**/*.java"/>           <include name="plugin/**/*.java"/>     <!-- Exclude generated sources -->     <exclude name="**/NutchAnalysis.java" />     <exclude name="**/NutchAnalysisTokenManager.java" />       </fileset>     </pmd>   <condition property="pmd.stop" value="true">       <and>         <isset property="pmd.failures" />           <not>             <equals arg1="0" arg2="${pmd.failures}" trim="true" />           </not>       </and>   </condition>   <fail if="pmd.stop">FAILURE: PMD shows ${pmd.failures} rule violations. See ${pmd.report} for details.</fail>   </target>   <!-- ----------------------------------------------------------------== -->   <!-- Run unit tests                                                     -->    <!-- ----------------------------------------------------------------== -->   <target name="test" depends="test-core, test-plugins"/>   <target name="test-core" depends="job, compile-core-test">     <delete dir="${test.build.data}"/>     <mkdir dir="${test.build.data}"/>     <!--       copy resources needed in junit tests     -->     <copy todir="${test.build.data}">       <fileset dir="src/testresources" includes="**/*"/>     </copy>     <copy file="${test.src.dir}/nutch-site.xml"           todir="${test.build.classes}"/>     <copy file="${test.src.dir}/log4j.properties"           todir="${test.build.classes}"/>     <junit printsummary="yes" haltonfailure="no" fork="yes" dir="${basedir}"       errorProperty="tests.failed" failureProperty="tests.failed" maxmemory="1000m">       <sysproperty key="test.build.data" value="${test.build.data}"/>       <sysproperty key="test.src.dir" value="${test.src.dir}"/>       <classpath refid="test.classpath"/>       <formatter type="${test.junit.output.format}" />       <batchtest todir="${test.build.dir}" unless="testcase">         <fileset dir="${test.src.dir}"                  includes="**/Test*.java" excludes="**/${test.exclude}.java" />       </batchtest>       <batchtest todir="${test.build.dir}" if="testcase">         <fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>       </batchtest>     </junit>     <fail if="tests.failed">Tests failed!</fail>   </target>      <target name="test-plugins" depends="compile">     <ant dir="src/plugin" target="test" inheritAll="false"/>   </target>   <target name="nightly" depends="test, tar">   </target>   <!-- ----------------------------------------------------------------== -->   <!-- Documentation                                                      -->   <!-- ----------------------------------------------------------------== -->   <target name="javadoc" depends="compile">     <mkdir dir="${build.javadoc}"/>     <javadoc       overview="${src.dir}/overview.html"       destdir="${build.javadoc}"       author="true"       version="true"       use="true"       windowtitle="${Name} ${version} API"       doctitle="${Name} ${version} API"       bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"       >         <arg value="${javadoc.proxy.host}"/>         <arg value="${javadoc.proxy.port}"/>       <packageset dir="${src.dir}"/>       <packageset dir="${plugins.dir}/lib-http/src/java"/>       <packageset dir="${plugins.dir}/lib-parsems/src/java"/>       <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>       <packageset dir="${plugins.dir}/microformats-reltag/src/java"/>       <packageset dir="${plugins.dir}/ontology/src/java"/>       <packageset dir="${plugins.dir}/protocol-file/src/java"/>       <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>       <packageset dir="${plugins.dir}/protocol-http/src/java"/>       <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>       <packageset dir="${plugins.dir}/parse-ext/src/java"/>       <packageset dir="${plugins.dir}/parse-html/src/java"/>       <packageset dir="${plugins.dir}/parse-js/src/java"/>       <packageset dir="${plugins.dir}/parse-text/src/java"/>       <packageset dir="${plugins.dir}/parse-pdf/src/java"/> <!--  <packageset dir="${plugins.dir}/parse-rtf/src/java"/> plugin excluded from build due to licensing issues--> <!--  <packageset dir="${plugins.dir}/parse-mp3/src/java"/> plugin excluded from build due to licensing issues-->       <packageset dir="${plugins.dir}/parse-msexcel/src/java"/>       <packageset dir="${plugins.dir}/parse-mspowerpoint/src/java"/>       <packageset dir="${plugins.dir}/parse-msword/src/java"/>       <packageset dir="${plugins.dir}/parse-oo/src/java"/>       <packageset dir="${plugins.dir}/parse-rss/src/java"/>       <packageset dir="${plugins.dir}/parse-swf/src/java"/>       <packageset dir="${plugins.dir}/parse-zip/src/java"/>       <packageset dir="${plugins.dir}/index-basic/src/java"/>       <packageset dir="${plugins.dir}/index-more/src/java"/>       <packageset dir="${plugins.dir}/query-basic/src/java"/>       <packageset dir="${plugins.dir}/query-more/src/java"/>       <packageset dir="${plugins.dir}/query-site/src/java"/>       <packageset dir="${plugins.dir}/query-url/src/java"/>       <packageset dir="${plugins.dir}/scoring-opic/src/java"/>       <packageset dir="${plugins.dir}/summary-basic/src/java"/>       <packageset dir="${plugins.dir}/summary-lucene/src/java"/>       <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>       <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>       <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>       <packageset dir="${plugins.dir}/creativecommons/src/java"/>       <packageset dir="${plugins.dir}/languageidentifier/src/java"/>       <packageset dir="${plugins.dir}/clustering-carrot2/src/java"/>       <packageset dir="${plugins.dir}/ontology/src/java"/>              <link href="${javadoc.link.java}"/>       <link href="${javadoc.link.lucene}"/>       <link href="${javadoc.link.hadoop}"/>              <classpath refid="classpath"/>       <classpath>         <fileset dir="${plugins.dir}" >           <include name="**/*.jar"/>         </fileset>       </classpath>              <group title="Core" packages="org.apache.nutch.*"/>       <group title="Plugins API" packages="${plugins.api}"/>       <group title="Protocol Plugins" packages="${plugins.protocol}"/>       <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>       <group title="Scoring Plugins" packages="${plugins.scoring}"/>       <group title="Parse Plugins" packages="${plugins.parse}"/>       <group title="Analysis Plugins" packages="${plugins.analysis}"/>       <group title="Indexing Filter Plugins" packages="${plugins.index}"/>       <group title="Query Filter Plugins" packages="${plugins.query}"/>       <group title="Summary Plugins" packages="${plugins.summary}"/>       <group title="Clustering Plugins" packages="${plugins.clustering}"/>       <group title="Ontology Plugins" packages="${plugins.ontology}"/>       <group title="Misc. Plugins" packages="${plugins.misc}"/>     </javadoc>     <!-- Copy the plugin.dtd file to the plugin doc-files dir -->     <copy file="${plugins.dir}/plugin.dtd"           todir="${build.javadoc}/org/apache/nutch/plugin/doc-files"/>   </target>        <target name="default-doc">     <style basedir="${conf.dir}" destdir="${docs.dir}"            includes="nutch-default.xml" style="conf/nutch-conf.xsl"/>   </target>   <target name="generate-locale" if="doc.locale">     <echo message="Generating docs for locale=${doc.locale}"/>     <mkdir dir="${build.docs}/${doc.locale}/include"/>     <xslt in="${docs.src}/include/${doc.locale}/header.xml"           out="${build.docs}/${doc.locale}/include/header.html"           style="${docs.src}/style/nutch-header.xsl">         <xmlcatalog refid="docDTDs"/>     </xslt>     <dependset>        <srcfileset dir="${docs.src}/include/${doc.locale}" includes="*.xml"/>        <srcfileset dir="${docs.src}/style" includes="*.xsl"/>        <targetfileset dir="${docs.dir}/${doc.locale}" includes="*.html"/>     </dependset>       <copy file="${docs.src}/style/nutch-page.xsl"           todir="${build.docs}/${doc.locale}"           preservelastmodified="true"/>     <xslt basedir="${docs.src}/pages/${doc.locale}"           destdir="${docs.dir}/${doc.locale}"           includes="*.xml"           style="${build.docs}/${doc.locale}/nutch-page.xsl">          <xmlcatalog refid="docDTDs"/>     </xslt>   </target>   <target name="generate-docs" depends="init">     <dependset>        <srcfileset dir="${docs.src}/include" includes="*.html"/>        <targetfileset dir="${docs.dir}" includes="**/*.html"/>     </dependset>       <mkdir dir="${build.docs}/include"/>     <copy todir="${build.docs}/include">       <fileset dir="${docs.src}/include"/>     </copy>     <antcall target="generate-locale">       <param name="doc.locale" value="ca"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="de"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="en"/>     </antcall>          <antcall target="generate-locale">       <param name="doc.locale" value="es"/>     </antcall>          <antcall target="generate-locale">       <param name="doc.locale" value="fi"/>     </antcall>          <antcall target="generate-locale">       <param name="doc.locale" value="fr"/>     </antcall>          <antcall target="generate-locale">       <param name="doc.locale" value="hu"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="it"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="jp"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="ms"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="nl"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="pl"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="pt"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="sh"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="sr"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="sv"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="th"/>     </antcall>     <antcall target="generate-locale">       <param name="doc.locale" value="zh"/>     </antcall>     <fixcrlf srcdir="${docs.dir}" eol="lf" encoding="utf-8"              includes="**/*.html"/>   </target>   <!-- ----------------------------------------------------------------== -->   <!-- D I S T R I B U T I O N                                            -->   <!-- ----------------------------------------------------------------== -->   <!--                                                                    -->   <!-- ----------------------------------------------------------------== -->   <target name="package" depends="jar, job, war, javadoc">     <mkdir dir="${dist.dir}"/>     <mkdir dir="${dist.dir}/lib"/>     <mkdir dir="${dist.dir}/bin"/>     <mkdir dir="${dist.dir}/docs"/>     <mkdir dir="${dist.dir}/docs/api"/>     <mkdir dir="${dist.dir}/plugins"/>     <copy todir="${dist.dir}/lib" includeEmptyDirs="false">       <fileset dir="lib"/>     </copy>     <copy todir="${dist.dir}/plugins">       <fileset dir="${build.plugins}"/>     </copy>     <copy todir="${dist.dir}/webapps">       <fileset dir="${build.webapps}"/>     </copy>     <copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>     <copy file="${build.dir}/${final.name}.job" todir="${dist.dir}"/>     <copy file="${build.dir}/${final.name}.war" todir="${dist.dir}"/>     <copy todir="${dist.dir}/bin">       <fileset dir="bin"/>     </copy>     <copy todir="${dist.dir}/conf">       <fileset dir="${conf.dir}" excludes="**/*.template"/>     </copy>     <chmod perm="ugo+x" type="file">         <fileset dir="${dist.dir}/bin"/>     </chmod>     <copy todir="${dist.dir}/docs">       <fileset dir="${docs.dir}"/>     </copy>     <copy todir="${dist.dir}/docs/api">       <fileset dir="${build.javadoc}"/>     </copy>     <copy todir="${dist.dir}">       <fileset dir=".">         <include name="*.txt" />         <include name="KEYS" />       </fileset>     </copy>     <copy todir="${dist.dir}/src" includeEmptyDirs="true">       <fileset dir="src"/>     </copy>     <copy todir="${dist.dir}/" file="build.xml"/>     <copy todir="${dist.dir}/" file="default.properties"/>   </target>   <!-- ----------------------------------------------------------------== -->   <!-- Make release tarball                                               -->   <!-- ----------------------------------------------------------------== -->   <target name="tar" depends="package">     <tar compression="gzip" longfile="gnu"       destfile="${build.dir}/${final.name}.tar.gz">       <tarfileset dir="${build.dir}" mode="664">   <exclude name="${final.name}/bin/*" />         <include name="${final.name}/**" />       </tarfileset>       <tarfileset dir="${build.dir}" mode="755">         <include name="${final.name}/bin/*" />       </tarfileset>     </tar>   </target>      <!-- ----------------------------------------------------------------== -->   <!-- Clean.  Delete the build files, and their directories              -->   <!-- ----------------------------------------------------------------== -->   <target name="clean">     <delete dir="${build.dir}"/>   </target>   <!-- ----------------------------------------------------------------== -->   <!-- RAT targets                                                        -->   <!-- ----------------------------------------------------------------== -->   <target name="rat-sources-typedef">     <typedef resource="org/apache/rat/anttasks/antlib.xml" >       <classpath>         <fileset dir="." includes="rat*.jar"/>       </classpath>     </typedef>   </target>   <target name="rat-sources" depends="rat-sources-typedef"     description="runs the tasks over src/java">     <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">       <fileset dir="src">         <include name="java/**/*"/>         <include name="plugin/**/src/**/*"/>       </fileset>     </rat:report>   </target>    </project> File: default.properties Name=Nutch name=nutch version=1.0 final.name=${name}-${version} year=2006 basedir = ./ src.dir = ./src/java lib.dir = ./lib conf.dir = ./conf plugins.dir = ./src/plugin docs.dir = ./docs docs.src = ${basedir}/src/web xmlcatalog.dir = ${basedir}/src/xmlcatalog build.dir = ./build build.classes = ${build.dir}/classes build.webapps = ${build.dir}/webapps build.plugins = ${build.dir}/plugins build.docs = ${build.dir}/docs build.javadoc = ${build.docs}/api build.encoding = UTF-8 test.src.dir = ./src/test test.build.dir = ${build.dir}/test test.build.data =  ${test.build.dir}/data test.build.classes = ${test.build.dir}/classes test.build.javadoc = ${test.build.dir}/docs/api javacc.home=/usr/java/javacc web.src.dir = ./src/web src.webapps = ./src/webapps # Proxy Host and Port to use for building JavaDoc javadoc.proxy.host=-J-DproxyHost= javadoc.proxy.port=-J-DproxyPort= javadoc.link.java=http://java.sun.com/j2se/1.4.2/docs/api/ javadoc.link.lucene=http://jakarta.apache.org/lucene/docs/api/ javadoc.link.hadoop=http://lucene.apache.org/hadoop/docs/api/ javadoc.packages=org.apache.nutch.* dist.dir=${build.dir}/${final.name} javac.debug=on javac.optimize=on javac.deprecation=off javac.version= 1.5 # # Plugins API # plugins.api=\    org.apache.nutch.protocol.http.api*:\    org.apache.nutch.urlfilter.api*:\    org.apache.nutch.parse.ms* # # Protocol Plugins # plugins.protocol=\    org.apache.nutch.protocol.file*:\    org.apache.nutch.protocol.ftp*:\    org.apache.nutch.protocol.http*:\    org.apache.nutch.protocol.httpclient* # # URL Filter Plugins # plugins.urlfilter=\    org.apache.nutch.urlfilter.automaton*:\    org.apache.nutch.urlfilter.prefix*:\    org.apache.nutch.urlfilter.regex* # # Scoring Plugins # plugins.scoring=\    org.apache.nutch.scoring.opic* # # Parse Plugins # plugins.parse=\    org.apache.nutch.parse.ext*:\    org.apache.nutch.parse.html*:\    org.apache.nutch.parse.js:\    org.apache.nutch.parse.mp3*:\    org.apache.nutch.parse.msexcel*:\    org.apache.nutch.parse.mspowerpoint*:\    org.apache.nutch.parse.msword*:\    org.apache.nutch.parse.oo*:\    org.apache.nutch.parse.pdf*:\    org.apache.nutch.parse.rtf*:\    org.apache.nutch.parse.rss*:\    org.apache.nutch.parse.swf*:\    org.apache.nutch.parse.text:\    org.apache.nutch.parse.zip # # Analysis Plugins # plugins.analysis=\ #  ${plugin.analysis-de}:\ #  ${plugin.analysis-fr} # # Indexing Filter Plugins # plugins.index=\    org.apache.nutch.indexer.basic*:\    org.apache.nutch.indexer.more* # # Query Filter Plugins # plugins.query=\    org.apache.nutch.searcher.basic*:\    org.apache.nutch.searcher.more*:\    org.apache.nutch.searcher.site*:\    org.apache.nutch.searcher.url* # # Ontology Plugins # plugins.ontology=\    org.apache.nutch.ontology.jena* # # Online Clusterer Plugins # plugins.clustering=\    org.apache.nutch.clustering.carrot2* # # Summary Plugins # plugins.summary=\    org.apache.nutch.summary.basic*:\    org.apache.nutch.summary.lucene* # # Misc. Plugins # # (gathers plugins that cannot be dispatched # in any category, mainly because they contains # many extension points) # plugins.misc=\    org.apache.nutch.analysis.lang*:\    org.apache.nutch.microformats.reltag*:\    org.creativecommons.nutch*