Lines 18-90
Link Here
|
18 |
* Audacious or using our public API to be a derived work. |
18 |
* Audacious or using our public API to be a derived work. |
19 |
*/ |
19 |
*/ |
20 |
|
20 |
|
|
|
21 |
/* |
22 |
* Note: This code used to do some normalization of strings: conversion to |
23 |
* UTF-8, conversion of the empty string to NULL, and (optionally) conversion |
24 |
* to uppercase. However, because such conversions can change the length of the |
25 |
* string, they can lead to a double-free. |
26 |
* |
27 |
* Consider: |
28 |
* |
29 |
* stringpool_get is called twice with the same 99-character ISO-8859-1 string. |
30 |
* The string is short enough to be cached, so stringpool_get returns a cached, |
31 |
* 101-character UTF-8 string. stringpool_unref is then called twice |
32 |
* with the cached string. Now that it has been converted, it is too long to be |
33 |
* cached, so stringpool_unref simply frees it, twice. |
34 |
* |
35 |
* Therefore, it is essential for stringpool_get to return a string that is |
36 |
* exactly the same as the one passed it. |
37 |
* |
38 |
* --jlindgren |
39 |
*/ |
40 |
|
21 |
#include <glib.h> |
41 |
#include <glib.h> |
22 |
#include <mowgli.h> |
42 |
#include <mowgli.h> |
23 |
|
43 |
|
24 |
#include "audstrings.h" |
44 |
#include "audstrings.h" |
25 |
|
45 |
|
26 |
/* |
|
|
27 |
* Canonization mode: |
28 |
* |
29 |
* CASE_INSENSITIVE_CANON: Store pooled strings in the tree in normalized case. |
30 |
* This is slightly slower than without, but has a few benefits. |
31 |
* Specifically, case is normalized in the tuples, and memory usage is |
32 |
* reduced further (due to more dupes being killed). |
33 |
* |
34 |
* NO_CANON: Use fast binary-exact lookups. Performance is slightly faster, but |
35 |
* less dupe reduction is done. |
36 |
* |
37 |
* TODO: make this runtime configurable. |
38 |
*/ |
39 |
#define NO_CANON |
40 |
#undef CASE_INSENSITIVE_CANON |
41 |
|
42 |
#ifdef NO_CANON |
43 |
|
44 |
static void |
46 |
static void |
45 |
noopcanon(gchar *str) |
47 |
noopcanon(gchar *str) |
46 |
{ |
48 |
{ |
47 |
return; |
49 |
return; |
48 |
} |
50 |
} |
49 |
|
51 |
|
50 |
#else |
|
|
51 |
|
52 |
#ifdef XXX_UTF8_CANON |
53 |
|
54 |
static void |
55 |
strcasecanon(gchar *str) |
56 |
{ |
57 |
gchar *c, *up; |
58 |
|
59 |
c = g_utf8_casefold(str, -1); |
60 |
up = c; |
61 |
|
62 |
/* we have to ensure we don't overflow str. *grumble* */ |
63 |
while (*str && *up) |
64 |
*str++ = *up++; |
65 |
|
66 |
if (*str && !*up) |
67 |
*str = '\0'; |
68 |
|
69 |
g_free(c); |
70 |
} |
71 |
|
72 |
#else |
73 |
|
74 |
static void |
75 |
strcasecanon(gchar *str) |
76 |
{ |
77 |
while (*str) |
78 |
{ |
79 |
/* toupper() should ignore utf8 data. if not, make XXX_UTF8_CANON work. */ |
80 |
*str = g_ascii_toupper(*str); |
81 |
str++; |
82 |
} |
83 |
} |
84 |
|
85 |
#endif |
86 |
#endif |
87 |
|
88 |
/** Structure to handle string refcounting. */ |
52 |
/** Structure to handle string refcounting. */ |
89 |
typedef struct { |
53 |
typedef struct { |
90 |
gint refcount; |
54 |
gint refcount; |
Lines 97-103
static GStaticMutex stringpool_mutex = G
Link Here
|
97 |
static gboolean |
61 |
static gboolean |
98 |
stringpool_should_cache(const gchar *string, gsize maxlen) |
62 |
stringpool_should_cache(const gchar *string, gsize maxlen) |
99 |
{ |
63 |
{ |
100 |
const gchar *end = memchr(string, '\0', maxlen); |
64 |
const gchar *end = memchr(string, '\0', maxlen + 1); |
101 |
return end != NULL ? TRUE : FALSE; |
65 |
return end != NULL ? TRUE : FALSE; |
102 |
} |
66 |
} |
103 |
|
67 |
|
Lines 108-129
stringpool_get(const gchar *str)
Link Here
|
108 |
|
72 |
|
109 |
g_return_val_if_fail(str != NULL, NULL); |
73 |
g_return_val_if_fail(str != NULL, NULL); |
110 |
|
74 |
|
111 |
if (!*str) |
|
|
112 |
return NULL; |
113 |
|
114 |
if (!stringpool_should_cache(str, 100)) |
75 |
if (!stringpool_should_cache(str, 100)) |
115 |
return str_assert_utf8(str); |
76 |
return g_strdup(str); |
116 |
|
77 |
|
117 |
g_static_mutex_lock(&stringpool_mutex); |
78 |
g_static_mutex_lock(&stringpool_mutex); |
118 |
|
79 |
|
119 |
if (stringpool_tree == NULL) |
80 |
if (stringpool_tree == NULL) |
120 |
{ |
|
|
121 |
#ifdef NO_CANON |
122 |
stringpool_tree = mowgli_patricia_create(noopcanon); |
81 |
stringpool_tree = mowgli_patricia_create(noopcanon); |
123 |
#else |
|
|
124 |
stringpool_tree = mowgli_patricia_create(strcasecanon); |
125 |
#endif |
126 |
} |
127 |
|
82 |
|
128 |
if ((ps = mowgli_patricia_retrieve(stringpool_tree, str)) != NULL) |
83 |
if ((ps = mowgli_patricia_retrieve(stringpool_tree, str)) != NULL) |
129 |
{ |
84 |
{ |
Lines 135-141
stringpool_get(const gchar *str)
Link Here
|
135 |
|
90 |
|
136 |
ps = g_slice_new0(PooledString); |
91 |
ps = g_slice_new0(PooledString); |
137 |
ps->refcount++; |
92 |
ps->refcount++; |
138 |
ps->str = str_assert_utf8(str); |
93 |
ps->str = g_strdup(str); |
139 |
mowgli_patricia_add(stringpool_tree, str, ps); |
94 |
mowgli_patricia_add(stringpool_tree, str, ps); |
140 |
|
95 |
|
141 |
g_static_mutex_unlock(&stringpool_mutex); |
96 |
g_static_mutex_unlock(&stringpool_mutex); |
Lines 147-163
stringpool_unref(gchar *str)
Link Here
|
147 |
{ |
102 |
{ |
148 |
PooledString *ps; |
103 |
PooledString *ps; |
149 |
|
104 |
|
150 |
g_return_if_fail(stringpool_tree != NULL); |
105 |
g_return_if_fail(str != NULL); |
151 |
|
|
|
152 |
if (str == NULL) |
153 |
return; |
154 |
|
106 |
|
155 |
if (!stringpool_should_cache(str, 100)) |
107 |
if (!stringpool_should_cache(str, 100)) |
156 |
{ |
108 |
{ |
157 |
g_free (str); |
109 |
g_free(str); |
158 |
return; |
110 |
return; |
159 |
} |
111 |
} |
160 |
|
112 |
|
|
|
113 |
g_return_if_fail(stringpool_tree != NULL); |
114 |
|
161 |
g_static_mutex_lock(&stringpool_mutex); |
115 |
g_static_mutex_lock(&stringpool_mutex); |
162 |
|
116 |
|
163 |
ps = mowgli_patricia_retrieve(stringpool_tree, str); |
117 |
ps = mowgli_patricia_retrieve(stringpool_tree, str); |