1 2 3 4 package net.nutch.indexer; 5 6 import org.apache.lucene.search.DefaultSimilarity; 7 8 9 public class NutchSimilarity extends DefaultSimilarity { 10 private static final int MIN_CONTENT_LENGTH = 1000; 11 12 13 public float lengthNorm(String fieldName, int numTokens) { 14 if ("url".equals(fieldName)) { return 1.0f / numTokens; 17 } else if ("anchor".equals(fieldName)) { return (float)(1.0/Math.log(Math.E+numTokens)); 20 } else if ("content".equals(fieldName)) { return super.lengthNorm(fieldName, Math.max(numTokens, MIN_CONTENT_LENGTH)); 23 24 } else { return super.lengthNorm(fieldName, numTokens); 26 } 27 } 28 29 public float coord(int overlap, int maxOverlap) { 30 return 1.0f; 31 } 32 33 } 34 | Popular Tags |