Skip to content

Commit

Permalink
added "smart" search
Browse files Browse the repository at this point in the history
  • Loading branch information
kuriho committed Jul 26, 2015
1 parent ba27f30 commit 87bcc69
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 43 deletions.
3 changes: 2 additions & 1 deletion Bakeneko/Bakeneko.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>true</LinkIncremental>
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
Expand All @@ -72,6 +72,7 @@
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<StringPooling>true</StringPooling>
<DebugInformationFormat>None</DebugInformationFormat>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
Expand Down
36 changes: 23 additions & 13 deletions Bakeneko/bakeneko/api/jisho.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,29 @@ std::string JishoData::toString() {
return dataString;
}

JishoData JishoAPI::lookUp(std::string const& word) {
JSONBlob json = m_json.parse(m_http.get(word));

//@TODO: JSON 1:n data
JishoData data;
data.word = json.get(1, (std::string)"japanese", (std::string)"word");
data.fields.push_back( json.get(1, (std::string)"japanese", (std::string)"reading") );
data.fields.push_back( utf::fromUTF32To8( jp::addRuby( utf::fromUTF8to32(data.word), utf::fromUTF8to32(data.fields[0]) ) ) );
data.fields.push_back( json.get(1, (std::string)"parts_of_speech") );
data.fields.push_back( json.get(1, (std::string)"english_definitions") );
//@TODO: additional fields...

return data;
std::vector<Data> JishoAPI::lookUp(std::string const& word) {
std::vector<Data> result;
std::vector<int> relevant = { 1 };

JSONBlob json = m_json.parse(m_http.get(word));

//@TODO: Let user decide what fields + which results to grab
if (json.m_commonIdx.size() != 0)
relevant = json.m_commonIdx;

for (int i : relevant) {
JishoData jishoData;
jishoData.word = json.get(i, API_KEY_JP, API_ELEM_WORD, AccessMode::First);
jishoData.fields.push_back(json.get(i, API_KEY_JP, API_ELEM_READ, AccessMode::First));
jishoData.fields.push_back(utf::fromUTF32To8(jp::addRuby(utf::fromUTF8to32(jishoData.word), utf::fromUTF8to32(jishoData.fields[0]))));
jishoData.fields.push_back(json.get(i, API_KEY_POS, AccessMode::First));
jishoData.fields.push_back(json.get(i, API_KEY_ENG));
//@TODO: additional fields...

result.push_back(jishoData.toData());
}

return result;
}

}; // namespace bakeneko
14 changes: 10 additions & 4 deletions Bakeneko/bakeneko/api/jisho.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,18 @@
#include "json.h"
#include "../data/data.h"

const std::string API_HOSTNAME = "jisho.org";
const std::string API_PATH = "/api/v1/search/words?keyword=";
const std::string API_HOSTNAME = "jisho.org";
const std::string API_PATH = "/api/v1/search/words?keyword=";

const std::string API_KEY_JP = "japanese";
const std::string API_ELEM_WORD = "word";
const std::string API_ELEM_READ = "reading";
const std::string API_KEY_POS = "parts_of_speech";
const std::string API_KEY_ENG = "english_definitions";

namespace bakeneko {

class JishoData {
class JishoData {
public:
std::string word = "";
std::vector<std::string> fields;
Expand All @@ -47,7 +53,7 @@ class JishoAPI {
JishoAPI(){ m_http.init(API_HOSTNAME, API_PATH); };
~JishoAPI() { };

JishoData lookUp(std::string const& word);
std::vector<Data> lookUp(std::string const& word);

private:
JSONParserLite m_json;
Expand Down
81 changes: 63 additions & 18 deletions Bakeneko/bakeneko/api/json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,39 +20,81 @@

#include "json.h"
#include <algorithm>
#include <sstream>
#include <regex>
#include "..\util\unicode.h"

namespace bakeneko{
void JSONBlob::add(int index, std::string const& key, std::string value) {
value.erase(std::remove(value.begin(), value.end(), '"'), value.end());
value.erase(std::remove(value.begin(), value.end(), '\t'), value.end());
value.erase(std::remove(value.begin(), value.end(), '\n'), value.end());
value.erase(std::remove(value.begin(), value.end(), '\r'), value.end());

m_data[index][key].push_back(value);
}

std::string JSONBlob::get(int pos, std::string const& key, std::string const& elem) {
std::string JSONBlob::get(int pos, std::string const& key, std::string const& elem, AccessMode mode, int item) {
if (mode == AccessMode::Exact && item == NULL) return ""; //usage error

if (m_data.find(pos) != m_data.end()) {
if (m_data.at(pos).find(key) != m_data.at(pos).end()) {
return findElem(m_data.at(pos).at(key)[0], elem);
return findElem(m_data.at(pos).at(key)[0], elem, mode, item);
}
}
return "";
}

std::string JSONBlob::get(int pos, std::string const& key) {
std::string JSONBlob::get(int pos, std::string const& key, AccessMode mode, int item) {
if (mode == AccessMode::Exact && item == NULL) return ""; //usage error

if (m_data.find(pos) != m_data.end()) {
if (m_data.at(pos).find(key) != m_data.at(pos).end()) {
return m_data.at(pos).at(key)[0];
switch (mode) {
case AccessMode::First:
item = 1;
//intentional fallthrough

case AccessMode::Exact:
if (item > m_data.at(pos).at(key).size()) {
return "";
} else {
return m_data.at(pos).at(key)[item - 1];
}

case AccessMode::All:
if (m_data.at(pos).at(key).size() == 1) {
return m_data.at(pos).at(key)[0];
} else {
static std::string br = utf::fromWidetoUTF8(L"<br>");
std::ostringstream os;

for (int i = 0; i < m_data.at(pos).at(key).size(); ++i) {
os << m_data.at(pos).at(key)[i];
if (i != m_data.at(pos).at(key).size() - 1)
os << br;
}
return os.str();
}
}
}
}
return "";
}

std::string JSONBlob::findElem(std::string jsonStr, std::string elem) {
if (elem == "") return jsonStr;
std::string JSONBlob::findElem(std::string jsonStr, std::string elem, AccessMode mode, int item) {
if (elem == "") return jsonStr;
if (mode == AccessMode::All) return ""; //not required right now
if (mode == AccessMode::Exact && item == NULL) return ""; //usage error
if (mode == AccessMode::First) item = 1;

std::regex rgx(elem + ":([^,]+),?");
std::smatch match;
int counter = 0;

while (std::regex_search(jsonStr, match, rgx)) {
return match[1];
counter++;
if (item == counter) return match[1];
}

return "";
Expand All @@ -63,41 +105,44 @@ namespace bakeneko{
const std::string is_common = "{\"is_common\":";
const std::regex rgx("\"(\\w+)\":\\[([^\\[\\]]+)\\]");

int begin = 0;
int end = 0;
int dataCounter = 0;
int begin = 0;
int end = 0;
int dataCounter = 0;
std::string common = "";

JSONBlob blob;

while (1) {
for (;;) {
std::string data = "";
std::smatch match;

if (end > begin) begin = end;

if (begin == 0) {
//find first word
//find first result
begin = json.find(is_common, begin);
if (begin == std::string::npos) break;
}

//find next word
//find next result
end = json.find(is_common, begin + is_common.length());
if (end == std::string::npos) end = json.length();
if (end <= begin) break; //EOS

//only extract common words for now
if (json.substr(begin+is_common.length(),4) != "true") continue;
//optimistically increment dataCounter
dataCounter++;

//index common results
if (json.substr(begin + is_common.length(), 4) == "true") {
blob.m_commonIdx.push_back(dataCounter);
}

data.assign(json, begin, end - begin);

//flatten JSON
data.erase(std::remove(data.begin(), data.end(), '{'), data.end());
data.erase(std::remove(data.begin(), data.end(), '}'), data.end());

//optimistically increment dataCounter
dataCounter++;

//add data from non-empty JSON arrays to blob
while(std::regex_search(data, match, rgx)) {
blob.add(dataCounter, match[1], match[2]);
Expand Down
9 changes: 6 additions & 3 deletions Bakeneko/bakeneko/api/json.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,21 @@
#include <map>

namespace bakeneko {
enum AccessMode { First, Exact, All };

class JSONBlob {
public:
std::map< int, std::map< std::string, std::vector<std::string> > > m_data;
std::vector<int> m_commonIdx;

JSONBlob() { };

void add(int index, std::string const& key, std::string value);
std::string get(int pos, std::string const& key, std::string const& elem);
std::string get(int pos, std::string const& key);
std::string get(int pos, std::string const& key, std::string const& elem, AccessMode mode = AccessMode::All, int item = NULL);
std::string get(int pos, std::string const& key, AccessMode mode = AccessMode::All, int item = NULL);

private:
std::string findElem(std::string jsonStr, std::string elem);
std::string findElem(std::string jsonStr, std::string elem, AccessMode mode, int item = NULL);
};

class JSONParserLite {
Expand Down
8 changes: 5 additions & 3 deletions Bakeneko/bakeneko/bakeneko.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ int Bakeneko::run(HINSTANCE hInstance) {
void Bakeneko::fetch(std::wstring word) {
try {

if (m_data.add( m_api.lookUp(utf::fromWidetoUTF8(word)).toData() ))
//@TODO: sanitize input
if ( m_data.add( m_api.lookUp(utf::fromWidetoUTF8(word)) ) )
m_taskbar.honk(word);

} catch (...) { /* obligatory note to handle exceptions later */ }
Expand Down Expand Up @@ -104,7 +105,8 @@ LRESULT Bakeneko::_WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam
HGLOBAL hClipboard = GetClipboardData(CF_UNICODETEXT);
WCHAR* clipWBuffer = (WCHAR*)GlobalLock(hClipboard);

m_basket.push((std::wstring)clipWBuffer);
if(clipWBuffer != nullptr)
m_basket.push((std::wstring)clipWBuffer);

GlobalUnlock(hClipboard);
CloseClipboard();
Expand Down Expand Up @@ -151,7 +153,7 @@ bool Bakeneko::saveFileDialog() {
GetSaveFileName(&ofn);
m_filepath = file;

//@TODO: somehow free up memory consumed by windows shell objects -> open it in a new thread?
//@TODO: Somehow free up memory consumed by windows shell objects.
return (m_filepath != L"");
}

Expand Down
9 changes: 8 additions & 1 deletion Bakeneko/bakeneko/data/data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,17 @@ namespace bakeneko {
m_map_data.push_back(newData);
return true;
};

return false;
};

BOOL DataMap::add(std::vector<Data> const& newDataList) {
bool success = false;
for (Data const& newData : newDataList) {
if (add(newData)) success = true;
}
return success;
}

void DataMap::remove(Data const& data) {
if (exists(data)) {
m_map_data.erase(std::find(m_map_data.begin(), m_map_data.end(), data));
Expand Down
1 change: 1 addition & 0 deletions Bakeneko/bakeneko/data/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ namespace bakeneko {
~DataMap() { };

BOOL add(Data const& newData);
BOOL add(std::vector<Data> const& newDataList);
void remove(Data const& data);
BOOL exists(Data const& data);
BOOL exists(std::string const& data);
Expand Down

0 comments on commit 87bcc69

Please sign in to comment.