1 2 3 4 package net.nutch.parse.mp3; 5 6 7 import net.nutch.parse.*; 8 import net.nutch.protocol.Content; 9 import org.farng.mp3.MP3File; 10 import org.farng.mp3.TagException; 11 import org.farng.mp3.id3.AbstractID3v2; 12 import org.farng.mp3.id3.AbstractID3v2Frame; 13 import org.farng.mp3.id3.ID3v1; 14 import org.farng.mp3.object.AbstractMP3Object; 15 16 import java.io.File ; 17 import java.io.FileOutputStream ; 18 import java.io.IOException ; 19 import java.net.MalformedURLException ; 20 import java.util.Iterator ; 21 22 26 27 public class MP3Parser implements Parser { 28 29 private MetadataCollector metadataCollector = new MetadataCollector(); 30 31 public Parse getParse(Content content) throws ParseException { 32 Parse parse = null; 33 metadataCollector.putAll(content.getMetadata()); 34 35 byte[] raw = content.getContent(); 36 37 File tmp = null; 38 try { 39 tmp = File.createTempFile("nutch", ".mp3"); 40 FileOutputStream fos = new FileOutputStream (tmp); 41 fos.write(raw); 42 fos.close(); 43 MP3File mp3 = new MP3File(tmp); 44 45 if (mp3.hasID3v2Tag()) { 46 parse = getID3v2Parse(mp3); 47 } else if (mp3.hasID3v1Tag()) { 48 parse = getID3v1Parse(mp3); 49 } else { 50 throw new ParseException("No textual content available"); 51 } 52 53 54 } catch (IOException e) { 55 throw new ParseException("Couldn't create temporary file", e); 56 } catch (TagException e) { 57 throw new ParseException("ID3 Tags could not be parsed", e); 58 } finally{ 59 tmp.delete(); 60 } 61 return parse; 62 } 63 64 private Parse getID3v1Parse(MP3File mp3) throws MalformedURLException { 65 ID3v1 tag = mp3.getID3v1Tag(); 66 metadataCollector.notifyProperty("TALB-Text", tag.getAlbum()); 67 metadataCollector.notifyProperty("TPE1-Text", tag.getArtist()); 68 metadataCollector.notifyProperty("COMM-Text", tag.getComment()); 69 metadataCollector.notifyProperty("TCON-Text", "(" + tag.getGenre() + ")"); 70 metadataCollector.notifyProperty("TIT2-Text", tag.getTitle()); 71 metadataCollector.notifyProperty("TYER-Text", tag.getYear()); 72 ParseData parseData = new ParseData(metadataCollector.getTitle(), 73 metadataCollector.getOutlinks(), 74 metadataCollector.getData()); 75 return new ParseImpl(metadataCollector.getText(), parseData); 76 } 77 78 public Parse getID3v2Parse(MP3File mp3) throws IOException { 79 AbstractID3v2 tag = mp3.getID3v2Tag(); 80 Iterator it = tag.iterator(); 81 while (it.hasNext()) { 82 AbstractID3v2Frame frame = (AbstractID3v2Frame) it.next(); 83 String name = frame.getIdentifier().trim(); 84 if (!name.equals("APIC")) { 85 Iterator itBody = frame.getBody().iterator(); 86 while (itBody.hasNext()) { 87 AbstractMP3Object mp3Obj = (AbstractMP3Object) itBody.next(); 88 String bodyName = mp3Obj.getIdentifier(); 89 if (!bodyName.equals("Picture data")) { 90 String bodyValue = mp3Obj.getValue().toString(); 91 metadataCollector.notifyProperty(name + "-" + bodyName, bodyValue); 92 } 93 } 94 } 95 } 96 ParseData parseData = new ParseData(metadataCollector.getTitle(), 97 metadataCollector.getOutlinks(), 98 metadataCollector.getData()); 99 return new ParseImpl(metadataCollector.getText(), parseData); 100 } 101 102 103 } 104 | Popular Tags |