Skip to content

Commit

Permalink
Merge pull request #253 from Microsoft/aleRewardTime
Browse files Browse the repository at this point in the history
Circumnavigated costly xml wrangling
  • Loading branch information
DaveyBiggers authored Aug 3, 2016
2 parents 9b10236 + a94c4df commit 63fbaad
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 14 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ project( Malmo )

set( MALMO_VERSION_MAJOR 0)
set( MALMO_VERSION_MINOR 16)
set( MALMO_VERSION_REVISION 0)
set( MALMO_VERSION_REVISION 1)
set( MALMO_VERSION ${MALMO_VERSION_MAJOR}.${MALMO_VERSION_MINOR}.${MALMO_VERSION_REVISION} )
# N.B. Check that this version number matches the one in Minecraft/src/main/java/com/microsoft/Malmo/MalmoMod.java

Expand Down
20 changes: 9 additions & 11 deletions Malmo/src/ALEAgentHost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ namespace malmo

this->ale_interface = boost::make_shared<ALEInterface>(role != 0);
this->ale_interface->setInt("random_seed", 123);
this->ale_interface->setFloat("repeat_action_probability", 0); // Default is 0.25. We really don't want this!
this->ale_interface->loadROM(unique_experiment_id);

if (this->video_frame_writer)
Expand Down Expand Up @@ -186,34 +187,34 @@ namespace malmo
this->world_state.number_of_video_frames_since_last_state++;
}

void ALEAgentHost::onReward(TimestampedReward reward)
void ALEAgentHost::onReward(boost::posix_time::ptime ts, float reward)
{
boost::lock_guard<boost::mutex> scope_guard(this->world_state_mutex);

if (this->reward_stream && this->reward_stream.is_open())
{
this->reward_stream << boost::posix_time::to_iso_string(reward.timestamp) << " " << reward.getAsXML(false) << std::endl;
this->reward_stream << boost::posix_time::to_iso_string(ts) << " " << "<Reward xmlns=\"http://ProjectMalmo.microsoft.com\"><Value dimension=\"0\" value=\"" << reward << "\" /</Reward>" << std::endl;
}

TimestampedReward tsr(reward);
switch( this->rewards_policy )
{
case AgentHost::RewardsPolicy::LATEST_REWARD_ONLY:
this->world_state.rewards.clear();
this->world_state.rewards.push_back( boost::make_shared<TimestampedReward>( reward ) );
this->world_state.rewards.push_back( boost::make_shared<TimestampedReward>( tsr ) );
break;
case AgentHost::RewardsPolicy::SUM_REWARDS:
if( !this->world_state.rewards.empty() ) {
reward.add(*this->world_state.rewards.front());
tsr.add(*this->world_state.rewards.front());
this->world_state.rewards.clear();
}
this->world_state.rewards.push_back( boost::make_shared<TimestampedReward>( reward ) );
this->world_state.rewards.push_back( boost::make_shared<TimestampedReward>( tsr ) );
// (timestamp is that of latest reward, even if zero)
break;
case AgentHost::RewardsPolicy::KEEP_ALL_REWARDS:
this->world_state.rewards.push_back( boost::make_shared<TimestampedReward>( reward ) );
this->world_state.rewards.push_back( boost::make_shared<TimestampedReward>( tsr ) );
break;
}

this->world_state.number_of_rewards_since_last_state++;
}

Expand Down Expand Up @@ -263,10 +264,7 @@ namespace malmo
std::string timestamp = boost::posix_time::to_iso_string(ts);
this->commands_stream << timestamp << " " << command << std::endl;
float reward = this->ale_interface->act(a);
std::ostringstream reward_xml;
reward_xml << "<Reward xmlns=\"http://ProjectMalmo.microsoft.com\"><Value dimension=\"0\" value=\""
<< reward << "\" /></Reward>";
onReward( TimestampedReward( ts, reward_xml.str() ) );
onReward( ts, reward );

// Get the video frame:
const ALEScreen& screen = this->ale_interface->getScreen();
Expand Down
2 changes: 1 addition & 1 deletion Malmo/src/ALEAgentHost.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ namespace malmo
void initialize(const MissionSpec& mission, const MissionRecordSpec& mission_record, int role, std::string unique_experiment_id);

void onVideo(TimestampedVideoFrame message);
void onReward(TimestampedReward message);
void onReward(boost::posix_time::ptime ts, float reward);
void onObservation(TimestampedString message);

AgentHost::VideoPolicy video_policy;
Expand Down
5 changes: 5 additions & 0 deletions Malmo/src/TimestampedReward.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@

namespace malmo
{
TimestampedReward::TimestampedReward(float reward)
{
this->values[0] = static_cast<double>(reward);
}

TimestampedReward::TimestampedReward(boost::posix_time::ptime timestamp,std::string xml_string)
{
const bool validate = true;
Expand Down
2 changes: 2 additions & 0 deletions Malmo/src/TimestampedReward.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ namespace malmo
class TimestampedReward
{
public:
//! Constructs from a single reward float (assumes default dimension of 0)
TimestampedReward(float reward);

//! Constructs from an XML string.
TimestampedReward(boost::posix_time::ptime timestamp,std::string xml_string);
Expand Down
2 changes: 1 addition & 1 deletion Minecraft/src/main/java/com/microsoft/Malmo/MalmoMod.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
public class MalmoMod
{
public static final String MODID = "malmomod";
public static final String VERSION = "0.16.0"; // N.B. Check that this version number matches the one in the root CMakeLists.txt and the Schemas.
public static final String VERSION = "0.16.1"; // N.B. Check that this version number matches the one in the root CMakeLists.txt and the Schemas.
public static final String SOCKET_CONFIGS = "malmoports";
public static final String DIAGNOSTIC_CONFIGS = "malmodiags";
public static final String AUTHENTICATION_CONFIGS = "malmologins";
Expand Down

0 comments on commit 63fbaad

Please sign in to comment.