-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathpeeker_body.php
338 lines (304 loc) · 8.75 KB
/
peeker_body.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
<?php
/**
*
* Makes email body parts into objects
* so they can be acted on by other classes
* access data, execute detectors, etc...
* class vars are mapped to the body parts
* HTML and PLAIN parts get their own properties
* but, the other parts get stored in the parts
* and file classes
* extends peek_mail_header so that this class
* has access to all the header data
* can manipulate messages on the mail server
* using the $this->peek_parent->resource property (resource)
*
*
*/
include_once('peeker_header.php');
class peeker_body extends peeker_header{
// define the class that body will use for parts
public $parts_class = 'peeker_parts';
// the whole raw body string
public $body_string;
// store the plain and/or html body
// if not a multipart message
// if this class is extended
// by the parts class, these
// properties could have been filled by
// data from a multipart message
// should figure better way for this
public $PLAIN='';
public $HTML='';
// the UNIX timestamp when the message
// came into this class from mail server
public $timestamp_pulled;
/**
* Constructor, connect to parent class
*
*/
public function __construct(&$peek_parent, $imap_h_obj)
{
// pass the resource on to the header class
parent::__construct($peek_parent, $imap_h_obj);
$this->log_state('LOADING body class');
}
/**
* get all the email parts in the body
* this causes gmail POP server to archive or delete
* (if account is set to do archive or delete on POP access)
*
*/
public function get_body()
{
// headers are retrieved first so body() is decoupled
// and messages() in peek class might have deleted this message
// but it is still in the object tree, check if it has been marked
if ($this->get_mark_delete())
{
return FALSE;
}
else
{
// NOTE: calling this function removes message from
// gmail's POP3 INBOX - not by deleting it, but making
// it effectively invisible (depending on gmail account's POP3 settings)
$this->log_state('Fetching structure for email #'.$this->Msgno);
$structure = @imap_fetchstructure($this->peek_parent->resource, $this->Msgno);
// make sure $structure is not null - can happen if passed MsgNo 0
// log state
// TODO: build error handling, exception
if ($structure===NULL) $this->log_state('get_body() method $structure is NULL. MsgNo is: '.(int)$this->MsgNo);
// check for mail server errors here to clear the
// error stack and prevent it from posting
// PHP errors about badly formatted emails
// should probably store the errors with the email in a db
$this->_check_imap_errors('imap_fetchstructure');
// pull out the raw email body here for
// storage/export potential eg allowing mbox export
$this->log_state('Getting body for email #'.$this->Msgno);
$this->body_string = @imap_body($this->peek_parent->resource,$this->Msgno);
$this->_check_imap_errors('imap_body');
// see if it is a multipart messsage
// fill $this->parts_array with the parts
// could handle both these cases in the extract_parts function
if (isset($structure->parts) && count($structure->parts))
{
// extract every part of the email into the parts_array var
// this is a custom array with objects to help unify what we need from the parts
// extract this part of email, stores the data in properties
// recurses if necessary to get all the parts into the array
// this is a little weird here since the method is in a sub class
// and you have to make sure the class is loaded before extracting
if (class_exists($this->parts_class))
{
$this->extract_parts($structure->parts);
}
else
{
$this->log_state(
'No parts class defined in body. imap_fetchstructure() parsing ( method extract_parts() ) failed.');
}
}
else
{
// not a multipart message
// get the body of message
// decode if quoted-printable or base64
if ($structure->encoding==3)
{
$body=base64_decode($this->body_string);
}
elseif ($structure->encoding==4)
{
$body=quoted_printable_decode($this->body_string);
}
else
{
$body = $this->body_string;
}
// if this is a PLAIN or HTML part it will
// be written to the respective property
// create a var for $this->PLAIN or $this->HTML
$sub_type = strtoupper($structure->subtype);
if ($sub_type === 'PLAIN')
{
$this->PLAIN = $body;
// see comment below in the HTML part
// uncomment this to convert all PLAIN parts to utf8
if (0) $this->PLAIN = $this->peek_parent->decode_mime($this->PLAIN);
}
if ($sub_type === 'HTML')
{
$this->HTML = $body;
// DEC 20101210 turn off this line until needed
// deals with encoded HTML iso-8859-1 that needs
// to get inserted as UTF-8 into db but insert fails
// this should fix it, insert only inserts HTML
// up to encoded char and then silently drops the rest
// uncomment this to convert all HTML parts to utf8
if (0) $this->HTML = $this->peek_parent->decode_mime($this->HTML);
}
}
// parts_array filled by peek_mail_parts class
// represent internal date as UNIX timestamp
// this is actually the timestamp of
// when the message was put into this class
// rather than "received" (which should better
// be the datestamp for when the message
// was accepted to the receiving SMTP server)
$this->timestamp_pulled = time();
// return TRUE to allow this to function as a
// kind of default detector if needed
return TRUE;
}
}
/**
* get the body part (raw text undecoded)
*
*/
public function get_body_string()
{
return $this->body_string;
}
/**
* get the PLAIN part (text-only)
*
*/
public function get_plain()
{
return $this->PLAIN;
}
/**
* get the HTML part
* or if there is a rewritten part, send that
*
*/
public function get_html()
{
return $this->HTML;
}
/**
* get the HTML part
* or if there is a rewritten part, send that
*
*/
public function get_html_filtered()
{
$html = (isset($this->HTML_rewritten))?$this->HTML_rewritten:$this->HTML;
return $html;
}
/**
* get the date pulled timestamp
*
*/
public function get_timestamp_pulled()
{
return $this->timestamp_pulled;
}
/**
* get the date pulled stamp
* converts internal timestamp
* to Y-m-d H:i:s mysql datetime string
*
*/
public function get_date_pulled()
{
return date('Y-m-d H:i:s', $this->timestamp_pulled);
}
// ------- detectors - return boolean ------- //
/**
* true if pattern matches the PLAIN part (text-only)
*
*
*/
public function preg_match_PLAIN($pattern)
{
return (bool)preg_match($pattern,$this->PLAIN);
}
/**
* true if pattern matches the HTML part
*
*
*/
public function preg_match_HTML($pattern)
{
return (bool)preg_match($pattern,$this->HTML);
}
/**
* true if PLAIN part but not HTML
*
*
*/
public function has_PLAIN_not_HTML()
{
return $this->PLAIN != '' && $this->HTML == '';
}
/**
* true if string is in from address
* and other conditions regarding
* PLAIN and HTML message parts fit
* test if we need to fix stupid people's email that they try to send as HTML
* but without the proper MIME types and boundaries specified
* bascially, a brute force check on the text to see if it "starts"
* with the tag <html> (which is how a rudimentary html doc can start)
* and also make sure that we don't overwrite an existing HTML property
* if it exists
* NOTE: some HTML comes in through the body
* with =3D style MIME encoded equals chars, etc...
* need to look into fixing those too
*
*/
public function fix_MIME_from_sender($from_str)
{
// use a detector that is in the parent class
if ($this->in_from($from_str))
{
if ( $this->PLAIN !== '' )
{
if (strpos($this->PLAIN,'<html>')<25)
{
if ($this->HTML ==='' )
{
return TRUE;
}
}
}
}
return FALSE;
}
//------- callbacks -------//
/**
* takes the PLAIN property
* and stuffs the data into the
* HTML property -
* can be used to fix badly written
* emails and also force all emails
* to be HTML
*/
public function put_PLAIN_into_HTML()
{
// see loreal email for example
// this should be handled in a
// better way, perhaps known defects in email
// could be tracked in a table and fixes
// applied before the data is stored.
// also force quoted printable decoding
// NOTE: shouldn't have to do this decoding...
// is it vestigal behavior?
// in case the message is encoded
$this->HTML = quoted_printable_decode($this->PLAIN);
}
/**
* wrap html and body tags
* around HTML part
* this lets other functions
* deal with tagged html
*
*/
public function wrap_HTML_with_HTML_tags()
{
$this->HTML = '<html><body>'.$this->HTML.'</body></html>';
}
}
// EOF