Wednesday, August 17, 2011

Morgan Stanley Interview Question


Suppose you have a large file with lots of words. How would you find the unique words and their count?
What kind of data structure u will use? What will be the time complexity and space complexity?

/* Algorithm to count words in a file. Please dont reply opinions on the code, comments on complexity analysis is appreciated
For each word check if the word exists in the hash, if it doesnt then insert the word with a count 1
if it does then increment the value
For each word in the hashmap, print word and its count
Complexity: Linear insertion and space
NOTE:The code only compiles with newer C++ use the switch -std=c++0x in your Makefile*/
#include <iostream>
#include <vector>
#include <unordered_map>
typedef std::unordered_map<std::string, int> MyHashMap;
void printMyHashMap(MyHashMap::const_iterator itr,MyHashMap::const_iterator enditr){
while(itr!=enditr){
std::cout<<(*itr).first<<" : "<<(*itr).second<<std::endl;
++itr;
}
}
int main(){
MyHashMap wordsMap; //declare unordered map
//vector of strings
std::vector<std::string> strings {"count","words","in","a","file",
"and","print","words","with","their","count",
".","more","words","file"};
int len = strings.size();
std::vector<std::string>::const_iterator wordsItr;
wordsItr = strings.begin();
MyHashMap::iterator mapItr = wordsMap.begin();
for ( wordsItr=strings.begin(); wordsItr!=strings.end(); wordsItr++ ){
mapItr = wordsMap.find(*wordsItr);
if(mapItr==wordsMap.end()){ //if word not found then
wordsMap.insert(MyHashMap::value_type(*wordsItr, 1));
}else{
(*mapItr).second = ++(*mapItr).second; }
}
std::cout<<"words and their count :"<<std::endl;
printMyHashMap(wordsMap.begin(),wordsMap.end());
return (0);
}



///2
#include <iostream>
#include <algorithm>
#include <vector>
#include <fstream>
#include <map>
#include <iterator>
using namespace std;
int main() {
string str;
ifstream in("input.txt");
map<string, int> m;
if (in.is_open()) {
while(in >> str) {
m[str]++;
}
}
for(map<string, int>::iterator it = m.begin(); it != m.end(); it++) {
cout << (*it).first <<"\t" << (*it).second << endl;
}
}
input.txt
abc abc abc abc abc abc abc abcdef abc abcd
abc abc abc abc abc abc abc abcdef abc abcd
abc abc abc abc abc abc abc abcdef abc abcd
abc abc abc abc abc abc abc abcdef abc abcd
count words in a file and print words with their count more words file
output
ajay@ubuntu:~/workspace$ ./a.out
a 1
abc 32
abcd 4
abcdef 4
and 1
count 2
file 2
in 1
more 1
print 1
their 1
with 1
words 3


//Java method


import java.util.HashMap;
public class TestTest {
public static void main(String[] args){
String inputString = "abc abc abc abc abc abc abc abcdef abc abcd abc abc abc abc abc abc abc abcdef abc abcd" +
" abc abc abc abc abc abc abc abcdef abc abcd abc abc abc abc abc abc abc abcdef abc abcd" +
" count words in a file and print words with their count more words file";
HashMap<Object, Integer> myMap = new HashMap<Object, Integer>(){
@Override
public Integer put(Object arg0, Integer arg1) {
Integer i = super.get(arg0);
arg1 = i==null?1:i+1;
return super.put(arg0, arg1);
}
};

String[] splits = inputString.split(" ");
for(String s : splits){
myMap.put(s,1);
}

System.out.println(myMap);
}
}


Source: Here


No comments:

Post a Comment