-
Notifications
You must be signed in to change notification settings - Fork 1
/
model_tree.py
607 lines (468 loc) · 22.5 KB
/
model_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
#!/usr/bin/env python
import numpy as np
from sklearn.metrics import mean_squared_error
import yaml
from tqdm import tqdm
import cma
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings( action="ignore", module="sklearn", category=ConvergenceWarning )
class Linear_regression :
def __init__( self, L1=0, L2=0 ) :
self._L1 = L1
self._L2 = L2
if L1 == 0 and L2 == 0 :
from sklearn.linear_model import LinearRegression
self.model = LinearRegression()
else :
from sklearn.linear_model import ElasticNet
self.model = ElasticNet( L1 + L2, L1/( L1 + L2 ) )
def fit( self, X, y ) :
self.model.fit( X, y )
def predict( self, X ) :
return self.model.predict( X )
def get_params( self ) :
return np.append( self.model.coef_, self.model.intercept_ ).tolist()
def set_params( self, params ) :
self.model.coef_ = np.array( params[:-1] )
self.model.intercept_ = params[-1]
self.model.n_iter_ = 0
def __str__( self ) :
return 'Linear regression%s' % ( ' with L1 and L2 regularization coefficients of %g and %g'
% ( self._L1, self._L2 ) if self._L1 != 0 or self._L2 != 0 else '' )
class Polynomial_regression( Linear_regression ) :
def __init__( self, degree=2, L1=0, L2=0, interaction_only=False ) :
self._degree = degree
self._interaction_only = interaction_only
from sklearn.preprocessing import PolynomialFeatures
self.preprocessing = PolynomialFeatures( degree, include_bias=False, interaction_only=interaction_only )
Linear_regression.__init__( self, L1, L2 )
def fit( self, X, y ) :
X_poly = self.preprocessing.fit_transform( X )
self.model.fit( X_poly, y )
def predict( self, X ) :
X_poly = self.preprocessing.fit_transform( X )
return self.model.predict( X_poly )
def __str__( self ) :
return 'Polynomial regression of degree %i%s%s' % ( self._degree,
' with L1 and L2 regularization coefficients of %g and %g' % ( self._L1, self._L2 ) if self._L1 != 0 or self._L2 != 0 else '',
' and only interaction features' if self._interaction_only else '' )
def CMA_search( X, cost_function, verbose=False, indentation=0 ) :
data_center = np.mean( X, 0 )
data_maxvar = np.max( np.var( X, 0 ) )
x0 = np.random.randn( X.shape[1] )
x0 = np.append( x0, x0.dot( data_center ) )
#es = cma.CMAEvolutionStrategy( x0, data_maxvar, { 'verbose': 0, 'verb_log': 0, 'verb_disp': 0 } )
es = cma.CMAEvolutionStrategy( x0, data_maxvar, { 'verbose': 0, 'verb_log': 0, 'verb_disp': 0, 'tolconditioncov': False } )
with warnings.catch_warnings() :
warnings.simplefilter( "ignore" )
es.optimize( cost_function )
if verbose :
print( '%saCMA-ES results -> %i iterations, %i evaluations, termination status: %s' % ( ' '*indentation, es.result.iterations, es.result.evaluations, es.stop() ) )
#es.result_pretty()
return es.result.xbest
class Model_tree :
def __init__( self, oblique=True, max_depth=3, node_min_samples=1, model='linear', loss_tol=None, split_search='cma-es', margin_coef=0.01, search_grid=1, **model_options ) :
'''
Straight or oblique model tree regression.
oblique = False: each split is made according to a scalar threshold on a single feature (straight tree).
oblique = True: splits are defined by a linear combination of all features (hyperplane in the feature space).
max_depth: maximum depth of the tree.
node_min_samples: minimum number of training samples to be used to constitute and train a terminal node.
model: regression model to use at each terminal node.
loss_tol: tolerance on the model loss at which to stop splitting.
split_search: function used for searching the oblique split coefficients. If split_search='cma-es', Covariance Matrix Adaptation Evolution Strategy is used (oblique trees only).
margin_coef: coefficient used to incite the maximization of the margin with the training samples (oblique trees only).
search_grid: interval number of possible thresholds to skip for the first scan pass of a grid search (straight trees only).
**model_options: options to be passed to the regression model of each terminal node.
'''
if model == 'linear' :
self.model = lambda : Linear_regression( **model_options )
elif model == 'polynomial' :
self.model = lambda : Polynomial_regression( **model_options )
else :
self.model = lambda : model( **model_options )
if split_search == 'cma-es' :
self.split_search = CMA_search
else :
self.split_search = split_search
self.oblique = oblique
self.max_depth = max_depth
self.node_min_samples = node_min_samples
self.loss_tol = loss_tol
self.margin_coef = margin_coef
self.grid = search_grid
self._root_node = None
def _get_split_distribution( self, X, split_params ) :
if X.ndim == 1 : X = X[np.newaxis,:]
if self.oblique :
assert len( split_params ) == X.shape[1] + 1
boundary = split_params[0]*X[:,0] - split_params[-1]
for i in range( 1, X.shape[1] ) :
boundary += split_params[i]*X[:,i]
else :
assert len( split_params ) == 2 and isinstance( split_params[0], int )
boundary = X[:,split_params[0]] - split_params[1]
return boundary >= 0
def _divide_and_fit( self, X, y, split_params, margin_coef=0.01 ) :
assert len( y ) == X.shape[0]
distribution = self._get_split_distribution( X, split_params )
X_1 = X[distribution]
y_1 = y[distribution]
X_2 = X[~distribution]
y_2 = y[~distribution]
# If there is not enough samples on one side of the split, return the squared distance from the center of the samples:
if len( y_1 ) < self.node_min_samples or len( y_2 ) < self.node_min_samples :
if self.oblique :
nosplit_loss = ( ( np.mean( X, 0 ).dot( split_params[:-1] ) - split_params[-1] )/np.linalg.norm( split_params[:-1] ) )**2
else :
nosplit_loss = ( np.mean( X[:,split_params[0]] ) - split_params[1] )**2
results = { 'success': False,
'split_loss': nosplit_loss,
'margin_penalty': 0 }
#print( 'OUT no-split loss:', nosplit_loss )
return results
model_1 = self.model()
model_1.fit( X_1, y_1 )
y_pred_1 = model_1.predict( X_1 )
loss_1 = mean_squared_error( y_1, y_pred_1 )
model_2 = self.model()
model_2.fit( X_2, y_2 )
y_pred_2 = model_2.predict( X_2 )
loss_2 = mean_squared_error( y_2, y_pred_2 )
split_loss = ( len( y_1 )*loss_1 + len( y_2 )*loss_2 )/len( y )
# Add the maximization of the margin:
if self.oblique :
margin_distance = min( abs( X.dot( split_params[:-1] ) - split_params[-1] ) )/np.linalg.norm( split_params[:-1] )
else :
#margin_distance = min( abs( X[:,split_params[0]] - split_params[1] ) )
margin_distance = 0
margin_penalty = -margin_coef*margin_distance
results = { 'success': True,
'split_loss': split_loss,
'margin_penalty': margin_penalty,
'models': ( model_1, model_2 ),
'model_losses': ( loss_1, loss_2 ),
'X': ( X_1, X_2 ),
'y': ( y_1, y_2 ) }
return results
def _create_node( self, depth=0, model=None ) :
if depth == 0 :
self._node_count = 1
else :
self._node_count += 1
node = { 'depth': depth,
'id': self._node_count,
'model': model,
'terminal': True }
return node
def _split_recursively( self, node, X, y, verbose=1, loss=None ) :
# If the maximum depth is reached or there is not enough samples left:
if node['depth'] >= self.max_depth or len( y ) < 2*self.node_min_samples :
if loss is None :
node['model'] = self.model()
node['model'].fit( X, y )
y_pred = node['model'].predict( X )
loss = mean_squared_error( y, y_pred )
if verbose :
self._print_terminal_node( node['id'], node['depth'], 'max depth' if node['depth'] >= self.max_depth else 'samp limit', len( y ), loss )
return
if self.oblique :
# Declaration of the cost function for the split:
def cost_function( p ) :
results = self._divide_and_fit( X, y, p )
return results['split_loss'] + results['margin_penalty']
# Search for the optimal split:
split_params = self.split_search( X, cost_function, verbose > 1, 2 + 4*node['depth'] )
# Normalize by the highest coefficient and impose the first coefficient to be positive:
coef_max = max( np.abs( split_params ) )*( -1 if split_params[0] < 0 else 1 )
split_params = ( np.array( split_params )/coef_max ).tolist()
else :
best_split_loss = None
for feature in range( X.shape[1] ) :
# Sort all values taken by the feature:
feature_values = sorted( X[:,feature].tolist() )
# Trim the limit values according to the minimum number of samples required:
if self.node_min_samples > 1 :
feature_values = feature_values[self.node_min_samples-1:1-self.node_min_samples]
# Remove duplicate values:
feature_values = sorted( list( set( feature_values ) ) )
# Identify all possible thresholds in the middle of each successive pair of feature values:
threshold_list = [ ( feature_values[i+1] + feature_values[i] )/2 for i in range( len( feature_values ) - 1 ) ]
# Record the best split seen:
best_feature_loss = None
for threshold in tqdm( threshold_list[::self.grid], desc='Node %i at depth %i, scanning feature [%i/%i]'
% ( node['id'], node['depth'], feature, X.shape[1] - 1 ), leave=False, disable=verbose < 1 ) :
results = self._divide_and_fit( X, y, ( feature, threshold ) )
if best_split_loss is None or results['split_loss'] < best_split_loss :
best_split_loss = results['split_loss']
split_params = [ feature, threshold ]
if best_feature_loss is None or results['split_loss'] < best_feature_loss :
best_feature_loss = results['split_loss']
best_feature_threshold = threshold
if self.grid > 1 and best_feature_threshold is not None :
# Search for a better split around the best threshold from the grid search for the current feature:
best_index = threshold_list.index( best_feature_threshold )
refined_start_index = max( 0, best_index - self.grid + 1 )
threshold_list = threshold_list[refined_start_index:best_index+self.grid]
for threshold in tqdm( threshold_list, desc='Node %i at depth %i, scanning feature [%i/%i] (refined search)'
% ( node['id'], node['depth'], feature, X.shape[1] - 1 ), leave=False, disable=verbose < 1 ) :
results = self._divide_and_fit( X, y, ( feature, threshold ) )
if results['split_loss'] < best_split_loss :
best_split_loss = results['split_loss']
split_params = [ feature, threshold ]
if best_split_loss is None :
if loss is None :
node['model'] = self.model()
node['model'].fit( X, y )
y_pred = node['model'].predict( X )
loss = mean_squared_error( y, y_pred )
if verbose :
self._print_terminal_node( node['id'], node['depth'], 'no threshold', len( y ), loss )
return
# Get the data and models from the optimal split:
results = self._divide_and_fit( X, y, split_params )
if results['success'] :
if verbose :
# Proportion of samples in each split:
prop1 = len( results['y'][0] )/len( y )*100
prop2 = len( results['y'][1] )/len( y )*100
print( ' %s\u21B3Split node (%i) at depth %i (%.f%%/%.f%% of %i samples, split loss: %g%s)'
% ( ' '*node['depth'], node['id'], node['depth'], prop1, prop2, len( y ), results['split_loss'],
', margin penalty: %g' % results['margin_penalty'] if self.oblique else ', threshold on feature [%i]: %g' % tuple( split_params ) ) )
node['terminal'] = False
node['split_params'] = split_params
node['model'] = None # Free the model resources
node['children'] = []
for child in range( 2 ) :
node['children'].append( self._create_node( node['depth'] + 1, results['models'][child] ) )
# If the loss is not below the tolerance:
if self.loss_tol is None or results['model_losses'][child] > self.loss_tol :
self._split_recursively( node['children'][child], results['X'][child], results['y'][child], verbose, results['model_losses'][child] )
elif verbose :
self._print_terminal_node( node['children'][child]['id'], node['depth'] + 1, 'loss tol', len( results['y'][child] ), results['model_losses'][child] )
elif verbose :
print( ' %s\u21B3/!\ Node (%i) at depth %i: Could not find a suitable split (%i samples, split loss: %g)'
% ( ' '*node['depth'], node['id'], node['depth'], len( y ), results['split_loss'] ) )
def _print_terminal_node( self, node_id, depth, termination_str, n_samples, loss ) :
print( ' %s\u21B3Terminal node (%i) at depth %i (%s)%s %*i samples, model loss: %g'
% ( ' '*depth, node_id, depth, termination_str, ' '*abs( 10 - len( termination_str ) ), self._nsamples_len, n_samples, loss ) )
def fit( self, X, y, verbose=1, from_scratch=False ) :
'''
Build the tree from scratch based on the training data given by the array X of shape ( n samples, n features ).
If X is a single dimension array, it is considered to be n samples with a single feature.
If verbose = 0, no output is given.
If verbose = 1, only the outputs from the tree building are displayed.
If verbose = 2, outputs from the split search are provided as well.
If from_scratch = False and a tree already exists, it will continue building the tree from each current terminal node.
'''
if X.ndim == 1 :
X = X[:,np.newaxis]
if verbose :
print( self.__str__() )
print( 'Model: %s' % str( self.model() ) )
self._nsamples_len = len( str( len( y ) ) )
if self._root_node is None or from_scratch :
# Build the tree from the root:
self._root_node = self._create_node()
self._split_recursively( self._root_node, X, y, verbose )
else :
# Evaluate how the current terminal nodes divide the training data and performe on them:
y_pred, node_ids = self.predict( X, return_node_id=True )
terminal_nodes = [ self._get_node_from_id( node_id, self._root_node ) for node_id in sorted( set( node_ids ) ) ]
# Continue building the tree from each current terminal node getting samples:
for node in terminal_nodes :
indices = [ node_id == node['id'] for node_id in node_ids ]
node_loss = mean_squared_error( y[indices], y_pred[indices] )
if self.loss_tol is None or node_loss > self.loss_tol :
self._split_recursively( node, X[indices], y[indices], verbose, node_loss )
elif verbose :
self._print_terminal_node( node['id'], node['depth'], 'loss tol', sum( indices ), node_loss )
# Reset the order of all node IDs:
self._reset_ids()
if verbose :
y_pred = self.predict( X )
loss = mean_squared_error( y, y_pred )
print( 'Final loss: %g, number of nodes: %i, number of non-zero parameters: %i, total number of parameters: %i'
% ( loss, self._node_count, *self.get_number_of_params() ) )
def _get_node_from_id( self, node_id, current_node ) :
if current_node['id'] == node_id :
return current_node
elif current_node['terminal'] :
return None
else :
for child_node in current_node['children'] :
node = self._get_node_from_id( node_id, child_node )
if node is not None :
return node
return None
def _reset_ids( self, current_node=None ) :
if current_node is None :
current_node = self._root_node
self._node_count = 1
else :
self._node_count += 1
current_node['id'] = self._node_count
if not current_node['terminal'] :
for child_node in current_node['children'] :
node = self._reset_ids( child_node )
def _traverse_nodes_and_predict( self, node, x, return_node_id=False ) :
if node['terminal'] :
if x.ndim == 1 : x = x[np.newaxis,:]
if return_node_id :
return node['model'].predict( x ), node['id']
else :
return node['model'].predict( x )
side = 0 if self._get_split_distribution( x, node['split_params'] ) else 1
return self._traverse_nodes_and_predict( node['children'][side], x, return_node_id )
def predict( self, X, return_node_id=False ) :
'''
Predict every output for the array X of shape ( n samples, n features ).
If X is a single dimension array, it is considered to be one sample of n features.
If return_node_id is True, return a tuple containing first the list of predictions
and second a list of the corresponding terminal node numbers.
'''
if self._root_node is None :
raise RuntimeError( 'The tree has not been built yet' )
if X.ndim == 1 :
ret = self._traverse_nodes_and_predict( self._root_node, X[np.newaxis,:], return_node_id )
if return_node_id :
return ret[0].item(), ret[1]
else :
return ret.item()
else :
ret_list = [ self._traverse_nodes_and_predict( self._root_node, x, return_node_id ) for x in X ]
if return_node_id :
return np.hstack( [ ret[0] for ret in ret_list ] ), [ ret[1] for ret in ret_list ]
else :
return np.hstack( ret_list )
def _traverse_nodes_and_collect_params( self, node, parent_id=None, tree_params={} ) :
node_info = 'Node %i%s' % ( node['id'], ( ' at depth %i, child of node %i' % ( node['depth'], parent_id ) ) if parent_id is not None else ', root' )
tree_params[node['id']] = { 'info': node_info }
if node['terminal'] :
tree_params[node['id']]['terminal'] = True
tree_params[node['id']]['model params'] = node['model'].get_params()
return tree_params
else :
tree_params[node['id']]['terminal'] = False
tree_params[node['id']]['split params'] = node['split_params']
for child in range( 2 ) :
self._traverse_nodes_and_collect_params( node['children'][child], node['id'], tree_params )
return tree_params
def get_tree_params( self ) :
'''Return all node parameters describing the tree as a dictionary'''
if self._root_node is None :
return None
return self._traverse_nodes_and_collect_params( self._root_node )
def _traverse_nodes_and_count_params( self, node ) :
if node['terminal'] :
params = node['model'].get_params()
nonzero = sum( x != 0 for x in params )
total = len( params )
else :
params = node['split_params']
if self.oblique :
nonzero = sum( x != 0 for x in params ) - 1
total = len( params ) - 1
else :
nonzero = 1
total = 1
for child in range( 2 ) :
n, t = self._traverse_nodes_and_count_params( node['children'][child] )
nonzero += n
total += t
return nonzero, total
def get_number_of_params( self ) :
'''Return a tuple with first the number of non-zero parameters and second the total number of parameters'''
if self._root_node is None :
return 0
return self._traverse_nodes_and_count_params( self._root_node )
def _set_params_recursively( self, node, tree_params ) :
if tree_params[node['id']]['terminal'] :
node['model'] = self.model()
node['model'].set_params( tree_params[node['id']]['model params'] )
else :
node['terminal'] = False
node['split_params'] = tree_params[node['id']]['split params']
node['children'] = []
for child in range( 2 ) :
node['children'].append( self._create_node( node['depth'] + 1 ) )
self._set_params_recursively( node['children'][child], tree_params )
def set_tree_params( self, tree_params ) :
'''Build the tree according to a dictionary with the same structure as returned by get_tree_params()'''
if tree_params is not None :
self._root_node = self._create_node()
self._set_params_recursively( self._root_node, tree_params )
def save_tree_params( self, filename ) :
'''Save the description of the tree and its parameters in a YAML file'''
with open( filename + '.yaml', 'w') as f :
f.write( '# %s\n' % self.__str__() )
f.write( '# Model: %s\n' % str( self.model() ) )
#yaml.dump( self.get_tree_params(), f, default_flow_style=False ) # PyYAML < 5.1
yaml.dump( self.get_tree_params(), f, sort_keys=False )
def load_tree_params( self, filename ) :
'''Load the parameters and build the tree from a YAML file'''
with open( filename + '.yaml', 'r') as f :
#self.set_tree_params( yaml.load( f ) ) # PyYAML < 5.1
self.set_tree_params( yaml.load( f, Loader=yaml.FullLoader ) )
return self
def __str__( self ) :
return '%s Model Tree (max depth: %i, node min samples: %i, loss tol: %s%s)' %\
( 'Oblique' if self.oblique else 'Straight', self.max_depth, self.node_min_samples,
'%g' % self.loss_tol if self.loss_tol is not None else 'None',
', margin coef: %g' % self.margin_coef if self.oblique else ', search grid: %i' % self.grid )
def diagram( self, filename=None, feature_names=None, float_format='{:g}' ) :
'''Generate a diagram that depicts the tree and its parameters'''
if self._root_node is None :
raise RuntimeError( 'The tree has not been built yet' )
from graphviz import Digraph
g = Digraph( 'model_tree' )
def build_diagram_recursively( node, parent_name=None, edge_label=None ) :
# Define the unique identifier name for the current node:
node_name = str( node['id'] )
# Define the text in the bloc:
if node['terminal'] :
node_label = 'Model coefficients:\n'
model_params = node['model'].get_params()
if type( node['model'] ) is Linear_regression :
if feature_names is None :
feature_list = [ 'feature %i' % i for i in range( 1, len( model_params ) ) ]
else :
feature_list = feature_names.copy()
feature_list.append( 'bias' )
node_label += '\n'.join( [ ( '{}: ' + float_format ).format( f, p ) for f, p in zip( feature_list, model_params ) if p != 0 ] )
else :
node_label += '\n'.join( map( float_format.format, model_params ) )
else :
if self.oblique :
if feature_names is None :
feature_list = [ 'feature %i' % i for i in range( 1, len( node['split_params'] ) ) ]
else :
feature_list = feature_names.copy()
line_format = '{} ' + float_format + ' \u00D7 {}'
features_and_params = zip( feature_list, node['split_params'] )
node_label = '\n'.join( [ line_format.format( '+' if p >= 0 else '-', abs( p ), f ) for f, p in features_and_params ] )
node_label += ( '\n\u2265 ' + float_format ).format( node['split_params'][-1] )
else :
node_label = 'feature {}'.format( node['split_params'][0] + 1 ) if feature_names is None else feature_names[node['split_params'][0]]
node_label += ( ' \u2265 ' + float_format ).format( node['split_params'][1] )
# Draw the bloc:
g.node( node_name, node_label, shape=( 'box' if node['terminal'] else 'diamond' ) )
if parent_name is not None :
# Draw the edge with the parent node:
g.edge( parent_name, node_name, edge_label )
if not node['terminal'] :
# Continue with the children nodes:
for child in range( 2 ) :
build_diagram_recursively( node['children'][child], node_name, 'yes' if not child else 'no' )
build_diagram_recursively( self._root_node )
if filename is None :
g.view( cleanup=True )
else :
# Save the diagram into a file which format matches the extension provided:
try :
basename, extension = filename.rsplit( '.', 1 )
g.format = extension
except ValueError :
basename = filename
g.render( filename=basename, view=False, cleanup=True )