package template_sentence_generator;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.atilika.kuromoji.Token;
import org.atilika.kuromoji.Tokenizer;


public class NounList1 {


	public static List<Noun> getNounList(String filename) throws IOException{

		List<Noun> nounlist = new ArrayList<Noun>();
		List<String> sentencelist =new ArrayList<String>();
		Tokenizer tokenizer;
		List<Token> tokens;
		boolean isSame;

		sentencelist = OriginSentenceList.createOriginSentenceList("data/comentAll.txt");
		tokenizer = Tokenizer.builder().userDictionary("resources/userdict.txt").build();

		for(String str:sentencelist){
			str = str.replace("「ｂれあｋ」", "");
			tokens = tokenizer.tokenize(str);
			for(Token token:tokens){

				Noun noun = new Noun();
				isSame=false;

				if(token.getPartOfSpeech().startsWith("名詞,固有名詞,")&&token.isKnown()){

					for(int i=0;i<nounlist.size();i++){
						if(nounlist.get(i).getName().equals(token.getSurfaceForm())){
							nounlist.get(i).setCount(nounlist.get(i).getCount()+1);
							isSame = true;
							break;
						}
					}

				}

				if(isSame==false&&token.getPartOfSpeech().startsWith("名詞,固有名詞,")&&token.isKnown()){

					noun.setName(token.getSurfaceForm());
					noun.setPart(token.getPartOfSpeech());
					noun.setCount(1);
					nounlist.add(noun);
				}
			}
		}

		 Collections.sort(nounlist, new CountComparator());


		return nounlist;
	}

}

