1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
commit ed0adb6e4e873eca15b4765035d3c89888c74a72
Author: Stephen Watson <stephen@kerofin.demon.co.uk>
Date: Mon Jan 21 23:33:13 2008 +0000
MIME magic lookup now returns text/plain for certain unknown file types.
Restored the code, lost in the update to the newer XDG MIME code, which checks
unknown files to see if they are text.
Modifications to the XDG code are tagged with ROX: comments.
diff --git a/ROX-Filer/src/xdgmime.c b/ROX-Filer/src/xdgmime.c
index 133282d..51d38d6 100644
--- a/ROX-Filer/src/xdgmime.c
+++ b/ROX-Filer/src/xdgmime.c
@@ -65,6 +65,7 @@ XdgMimeCache **_xdg_mime_caches = NULL;
static int n_caches = 0;
const char xdg_mime_type_unknown[] = "application/octet-stream";
+const char xdg_mime_type_unknown_text[] = "text/plain"; /* ROX: */
enum
diff --git a/ROX-Filer/src/xdgmime.h b/ROX-Filer/src/xdgmime.h
index 82b5585..52d1524 100644
--- a/ROX-Filer/src/xdgmime.h
+++ b/ROX-Filer/src/xdgmime.h
@@ -74,7 +74,11 @@ typedef void (*XdgMimeDestroy) (void *user_data);
#endif
extern const char xdg_mime_type_unknown[];
-#define XDG_MIME_TYPE_UNKNOWN xdg_mime_type_unknown
+#define XDG_MIME_TYPE_UNKNOWN xdg_mime_type_unknown
+
+/* ROX: */
+extern const char xdg_mime_type_unknown_text[];
+#define XDG_MIME_TYPE_UNKNOWN_TEXT xdg_mime_type_unknown_text
const char *xdg_mime_get_mime_type_for_data (const void *data,
size_t len);
diff --git a/ROX-Filer/src/xdgmimemagic.c b/ROX-Filer/src/xdgmimemagic.c
index 892688b..d1d04b6 100644
--- a/ROX-Filer/src/xdgmimemagic.c
+++ b/ROX-Filer/src/xdgmimemagic.c
@@ -39,6 +39,9 @@
#include <errno.h>
#include <limits.h>
+/* ROX: */
+#include <glib.h>
+
#ifndef FALSE
#define FALSE (0)
#endif
@@ -654,6 +657,83 @@ _xdg_mime_magic_get_buffer_extents (XdgMimeMagic *mime_magic)
return mime_magic->max_extent;
}
+static gboolean _rox_buffer_looks_like_text (const void *data,
+ const size_t len)
+{
+ gchar *end;
+
+ if (g_utf8_validate (data, len, (const gchar**)&end))
+ {
+ /* g_utf8_validate allows control characters */
+ int i;
+ for (i = 0; i < len; i++)
+ {
+ unsigned char c = ((const guchar *) data)[i];
+ if (c < 32 && c != '\r' && c != '\n' && c != '\t')
+ return FALSE;
+ }
+ return TRUE;
+
+ } else {
+ /* Check whether the string was truncated in the middle of
+ * a valid UTF8 char, or if we really have an invalid
+ * UTF8 string
+ */
+ gint remaining_bytes = len;
+
+ remaining_bytes -= (end-((gchar*)data));
+
+ if (g_utf8_get_char_validated(end, remaining_bytes) == -2)
+ return TRUE;
+#if defined(HAVE_WCTYPE_H) && defined (HAVE_MBRTOWC)
+ else {
+ size_t wlen;
+ wchar_t wc;
+ gchar *src, *end;
+ mbstate_t state;
+
+ src = data;
+ end = data+len;
+
+ memset (&state, 0, sizeof (state));
+ while (src < end) {
+ /* Don't allow embedded zeros in textfiles */
+ if (*src == 0)
+ return FALSE;
+
+ wlen = mbrtowc(&wc, src, end - src, &state);
+
+ if (wlen == (size_t)(-1)) {
+ /* Illegal mb sequence */
+ return FALSE;
+ }
+
+ if (wlen == (size_t)(-2)) {
+ /* No complete mb char before end
+ * Probably a cut off char which is ok */
+ return TRUE;
+ }
+
+ if (wlen == 0) {
+ /* Don't allow embedded zeros in textfiles */
+ return FALSE;
+ }
+
+ if (!iswspace (wc) && !iswprint(wc)) {
+ /* Not a printable or whitspace
+ * Probably not a text file */
+ return FALSE;
+ }
+
+ src += wlen;
+ }
+ return TRUE;
+ }
+#endif /* defined(HAVE_WCTYPE_H) && defined (HAVE_MBRTOWC) */
+ }
+ return FALSE;
+}
+
const char *
_xdg_mime_magic_lookup_data (XdgMimeMagic *mime_magic,
const void *data,
@@ -708,6 +788,12 @@ _xdg_mime_magic_lookup_data (XdgMimeMagic *mime_magic,
}
}
+ if (mime_type == NULL)
+ {
+ if(_rox_buffer_looks_like_text(data, len))
+ mime_type = XDG_MIME_TYPE_UNKNOWN_TEXT;
+ }
+
return mime_type;
}
|