Commit | Line | Data |
---|---|---|
00a9ab8c BM |
1 | /* |
2 | * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version 2 | |
7 | * of the License, or (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | |
16 | */ | |
17 | ||
18 | /************************************************************************/ | |
19 | /* */ | |
20 | /* PROJECT : exFAT & FAT12/16/32 File System */ | |
21 | /* FILE : nls.c */ | |
22 | /* PURPOSE : sdFAT NLS Manager */ | |
23 | /* */ | |
24 | /*----------------------------------------------------------------------*/ | |
25 | /* NOTES */ | |
26 | /* */ | |
27 | /* */ | |
28 | /************************************************************************/ | |
29 | #include <linux/string.h> | |
30 | #include <linux/nls.h> | |
31 | ||
32 | #include "sdfat.h" | |
33 | #include "core.h" | |
34 | ||
35 | /*----------------------------------------------------------------------*/ | |
36 | /* Global Variable Definitions */ | |
37 | /*----------------------------------------------------------------------*/ | |
38 | ||
39 | /*----------------------------------------------------------------------*/ | |
40 | /* Local Variable Definitions */ | |
41 | /*----------------------------------------------------------------------*/ | |
42 | ||
43 | static u16 bad_dos_chars[] = { | |
44 | /* + , ; = [ ] */ | |
45 | 0x002B, 0x002C, 0x003B, 0x003D, 0x005B, 0x005D, | |
46 | 0xFF0B, 0xFF0C, 0xFF1B, 0xFF1D, 0xFF3B, 0xFF3D, | |
47 | 0 | |
48 | }; | |
49 | ||
50 | /* | |
51 | * Allow full-width illegal characters : | |
52 | * "MS windows 7" supports full-width-invalid-name-characters. | |
53 | * So we should check half-width-invalid-name-characters(ASCII) only | |
54 | * for compatibility. | |
55 | * | |
56 | * " * / : < > ? \ | | |
57 | * | |
58 | * patch 1.2.0 | |
59 | */ | |
60 | static u16 bad_uni_chars[] = { | |
61 | 0x0022, 0x002A, 0x002F, 0x003A, | |
62 | 0x003C, 0x003E, 0x003F, 0x005C, 0x007C, | |
63 | #if 0 /* allow full-width characters */ | |
64 | 0x201C, 0x201D, 0xFF0A, 0xFF0F, 0xFF1A, | |
65 | 0xFF1C, 0xFF1E, 0xFF1F, 0xFF3C, 0xFF5C, | |
66 | #endif | |
67 | 0 | |
68 | }; | |
69 | ||
70 | /*----------------------------------------------------------------------*/ | |
71 | /* Local Function Declarations */ | |
72 | /*----------------------------------------------------------------------*/ | |
73 | static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy); | |
74 | static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy); | |
75 | ||
76 | static u16 nls_upper(struct super_block *sb, u16 a) | |
77 | { | |
78 | FS_INFO_T *fsi = &(SDFAT_SB(sb)->fsi); | |
79 | ||
80 | if (SDFAT_SB(sb)->options.casesensitive) | |
81 | return a; | |
82 | if ((fsi->vol_utbl)[get_col_index(a)] != NULL) | |
83 | return (fsi->vol_utbl)[get_col_index(a)][get_row_index(a)]; | |
84 | else | |
85 | return a; | |
86 | } | |
87 | /*======================================================================*/ | |
88 | /* Global Function Definitions */ | |
89 | /*======================================================================*/ | |
90 | u16 *nls_wstrchr(u16 *str, u16 wchar) | |
91 | { | |
92 | while (*str) { | |
93 | if (*(str++) == wchar) | |
94 | return str; | |
95 | } | |
96 | ||
97 | return 0; | |
98 | } | |
99 | ||
100 | s32 nls_cmp_sfn(struct super_block *sb, u8 *a, u8 *b) | |
101 | { | |
102 | return strncmp((void *)a, (void *)b, DOS_NAME_LENGTH); | |
103 | } | |
104 | ||
105 | s32 nls_cmp_uniname(struct super_block *sb, u16 *a, u16 *b) | |
106 | { | |
107 | s32 i; | |
108 | ||
109 | for (i = 0; i < MAX_NAME_LENGTH; i++, a++, b++) { | |
110 | if (nls_upper(sb, *a) != nls_upper(sb, *b)) | |
111 | return 1; | |
112 | if (*a == 0x0) | |
113 | return 0; | |
114 | } | |
115 | return 0; | |
116 | } | |
117 | ||
118 | #define CASE_LOWER_BASE (0x08) /* base is lower case */ | |
119 | #define CASE_LOWER_EXT (0x10) /* extension is lower case */ | |
120 | ||
121 | s32 nls_uni16s_to_sfn(struct super_block *sb, UNI_NAME_T *p_uniname, DOS_NAME_T *p_dosname, s32 *p_lossy) | |
122 | { | |
123 | s32 i, j, len, lossy = NLS_NAME_NO_LOSSY; | |
124 | u8 buf[MAX_CHARSET_SIZE]; | |
125 | u8 lower = 0, upper = 0; | |
126 | u8 *dosname = p_dosname->name; | |
127 | u16 *uniname = p_uniname->name; | |
128 | u16 *p, *last_period; | |
129 | struct nls_table *nls = SDFAT_SB(sb)->nls_disk; | |
130 | ||
131 | /* DOSNAME is filled with space */ | |
132 | for (i = 0; i < DOS_NAME_LENGTH; i++) | |
133 | *(dosname+i) = ' '; | |
134 | ||
135 | /* DOT and DOTDOT are handled by VFS layer */ | |
136 | ||
137 | /* search for the last embedded period */ | |
138 | last_period = NULL; | |
139 | for (p = uniname; *p; p++) { | |
140 | if (*p == (u16) '.') | |
141 | last_period = p; | |
142 | } | |
143 | ||
144 | i = 0; | |
145 | while (i < DOS_NAME_LENGTH) { | |
146 | if (i == 8) { | |
147 | if (last_period == NULL) | |
148 | break; | |
149 | ||
150 | if (uniname <= last_period) { | |
151 | if (uniname < last_period) | |
152 | lossy |= NLS_NAME_OVERLEN; | |
153 | uniname = last_period + 1; | |
154 | } | |
155 | } | |
156 | ||
157 | if (*uniname == (u16) '\0') { | |
158 | break; | |
159 | } else if (*uniname == (u16) ' ') { | |
160 | lossy |= NLS_NAME_LOSSY; | |
161 | } else if (*uniname == (u16) '.') { | |
162 | if (uniname < last_period) | |
163 | lossy |= NLS_NAME_LOSSY; | |
164 | else | |
165 | i = 8; | |
166 | } else if (nls_wstrchr(bad_dos_chars, *uniname)) { | |
167 | lossy |= NLS_NAME_LOSSY; | |
168 | *(dosname+i) = '_'; | |
169 | i++; | |
170 | } else { | |
171 | len = convert_uni_to_ch(nls, *uniname, buf, &lossy); | |
172 | ||
173 | if (len > 1) { | |
174 | if ((i >= 8) && ((i+len) > DOS_NAME_LENGTH)) | |
175 | break; | |
176 | ||
177 | if ((i < 8) && ((i+len) > 8)) { | |
178 | i = 8; | |
179 | continue; | |
180 | } | |
181 | ||
182 | lower = 0xFF; | |
183 | ||
184 | for (j = 0; j < len; j++, i++) | |
185 | *(dosname+i) = *(buf+j); | |
186 | } else { /* len == 1 */ | |
187 | if ((*buf >= 'a') && (*buf <= 'z')) { | |
188 | *(dosname+i) = *buf - ('a' - 'A'); | |
189 | ||
190 | lower |= (i < 8) ? | |
191 | CASE_LOWER_BASE : | |
192 | CASE_LOWER_EXT; | |
193 | } else if ((*buf >= 'A') && (*buf <= 'Z')) { | |
194 | *(dosname+i) = *buf; | |
195 | ||
196 | upper |= (i < 8) ? | |
197 | CASE_LOWER_BASE : | |
198 | CASE_LOWER_EXT; | |
199 | } else { | |
200 | *(dosname+i) = *buf; | |
201 | } | |
202 | i++; | |
203 | } | |
204 | } | |
205 | ||
206 | uniname++; | |
207 | } | |
208 | ||
209 | if (*dosname == 0xE5) | |
210 | *dosname = 0x05; | |
211 | if (*uniname != 0x0) | |
212 | lossy |= NLS_NAME_OVERLEN; | |
213 | ||
214 | if (upper & lower) | |
215 | p_dosname->name_case = 0xFF; | |
216 | else | |
217 | p_dosname->name_case = lower; | |
218 | ||
219 | if (p_lossy) | |
220 | *p_lossy = lossy; | |
221 | return i; | |
222 | } | |
223 | ||
224 | s32 nls_sfn_to_uni16s(struct super_block *sb, DOS_NAME_T *p_dosname, UNI_NAME_T *p_uniname) | |
225 | { | |
226 | s32 i = 0, j, n = 0; | |
227 | u8 buf[MAX_DOSNAME_BUF_SIZE]; | |
228 | u8 *dosname = p_dosname->name; | |
229 | u16 *uniname = p_uniname->name; | |
230 | struct nls_table *nls = SDFAT_SB(sb)->nls_disk; | |
231 | ||
232 | if (*dosname == 0x05) { | |
233 | *buf = 0xE5; | |
234 | i++; | |
235 | n++; | |
236 | } | |
237 | ||
238 | for ( ; i < 8; i++, n++) { | |
239 | if (*(dosname+i) == ' ') | |
240 | break; | |
241 | ||
242 | if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') && | |
243 | (p_dosname->name_case & CASE_LOWER_BASE)) | |
244 | *(buf+n) = *(dosname+i) + ('a' - 'A'); | |
245 | else | |
246 | *(buf+n) = *(dosname+i); | |
247 | } | |
248 | if (*(dosname+8) != ' ') { | |
249 | *(buf+n) = '.'; | |
250 | n++; | |
251 | } | |
252 | ||
253 | for (i = 8; i < DOS_NAME_LENGTH; i++, n++) { | |
254 | if (*(dosname+i) == ' ') | |
255 | break; | |
256 | ||
257 | if ((*(dosname+i) >= 'A') && (*(dosname+i) <= 'Z') && | |
258 | (p_dosname->name_case & CASE_LOWER_EXT)) | |
259 | *(buf+n) = *(dosname+i) + ('a' - 'A'); | |
260 | else | |
261 | *(buf+n) = *(dosname+i); | |
262 | } | |
263 | *(buf+n) = '\0'; | |
264 | ||
265 | i = j = 0; | |
266 | while (j < MAX_NAME_LENGTH) { | |
267 | if (*(buf+i) == '\0') | |
268 | break; | |
269 | ||
270 | i += convert_ch_to_uni(nls, (buf+i), uniname, NULL); | |
271 | ||
272 | uniname++; | |
273 | j++; | |
274 | } | |
275 | ||
276 | *uniname = (u16) '\0'; | |
277 | return j; | |
278 | } | |
279 | ||
280 | static s32 __nls_utf16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen) | |
281 | { | |
282 | s32 len; | |
283 | const u16 *uniname = p_uniname->name; | |
284 | ||
285 | /* always len >= 0 */ | |
286 | len = utf16s_to_utf8s(uniname, MAX_NAME_LENGTH, UTF16_HOST_ENDIAN, | |
287 | p_cstring, buflen); | |
288 | p_cstring[len] = '\0'; | |
289 | return len; | |
290 | } | |
291 | ||
292 | static s32 __nls_vfsname_to_utf16s(struct super_block *sb, const u8 *p_cstring, | |
293 | const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy) | |
294 | { | |
295 | s32 i, unilen, lossy = NLS_NAME_NO_LOSSY; | |
296 | u16 upname[MAX_NAME_LENGTH+1]; | |
297 | u16 *uniname = p_uniname->name; | |
298 | ||
299 | BUG_ON(!len); | |
300 | ||
301 | unilen = utf8s_to_utf16s(p_cstring, len, UTF16_HOST_ENDIAN, | |
302 | (wchar_t *)uniname, MAX_NAME_LENGTH+2); | |
303 | if (unilen < 0) { | |
304 | MMSG("%s: failed to vfsname_to_utf16(err:%d) " | |
305 | "vfsnamelen:%d", __func__, unilen, len); | |
306 | return unilen; | |
307 | } | |
308 | ||
309 | if (unilen > MAX_NAME_LENGTH) { | |
310 | MMSG("%s: failed to vfsname_to_utf16(estr:ENAMETOOLONG) " | |
311 | "vfsnamelen:%d, unilen:%d>%d", | |
312 | __func__, len, unilen, MAX_NAME_LENGTH); | |
313 | return -ENAMETOOLONG; | |
314 | } | |
315 | ||
316 | p_uniname->name_len = (u8)(unilen & 0xFF); | |
317 | ||
318 | for (i = 0; i < unilen; i++) { | |
319 | if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname)) | |
320 | lossy |= NLS_NAME_LOSSY; | |
321 | ||
322 | *(upname+i) = nls_upper(sb, *uniname); | |
323 | uniname++; | |
324 | } | |
325 | ||
326 | *uniname = (u16)'\0'; | |
327 | p_uniname->name_len = unilen; | |
328 | p_uniname->name_hash = calc_chksum_2byte((void *) upname, | |
329 | unilen << 1, 0, CS_DEFAULT); | |
330 | ||
331 | if (p_lossy) | |
332 | *p_lossy = lossy; | |
333 | ||
334 | return unilen; | |
335 | } | |
336 | ||
337 | static s32 __nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *p_uniname, u8 *p_cstring, s32 buflen) | |
338 | { | |
339 | s32 i, j, len, out_len = 0; | |
340 | u8 buf[MAX_CHARSET_SIZE]; | |
341 | const u16 *uniname = p_uniname->name; | |
342 | struct nls_table *nls = SDFAT_SB(sb)->nls_io; | |
343 | ||
344 | i = 0; | |
345 | while ((i < MAX_NAME_LENGTH) && (out_len < (buflen-1))) { | |
346 | if (*uniname == (u16)'\0') | |
347 | break; | |
348 | ||
349 | len = convert_uni_to_ch(nls, *uniname, buf, NULL); | |
350 | ||
351 | if (out_len + len >= buflen) | |
352 | len = (buflen - 1) - out_len; | |
353 | ||
354 | out_len += len; | |
355 | ||
356 | if (len > 1) { | |
357 | for (j = 0; j < len; j++) | |
358 | *p_cstring++ = (s8) *(buf+j); | |
359 | } else { /* len == 1 */ | |
360 | *p_cstring++ = (s8) *buf; | |
361 | } | |
362 | ||
363 | uniname++; | |
364 | i++; | |
365 | } | |
366 | ||
367 | *p_cstring = '\0'; | |
368 | return out_len; | |
369 | } | |
370 | ||
371 | static s32 __nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring, | |
372 | const s32 len, UNI_NAME_T *p_uniname, s32 *p_lossy) | |
373 | { | |
374 | s32 i, unilen, lossy = NLS_NAME_NO_LOSSY; | |
375 | u16 upname[MAX_NAME_LENGTH+1]; | |
376 | u16 *uniname = p_uniname->name; | |
377 | struct nls_table *nls = SDFAT_SB(sb)->nls_io; | |
378 | ||
379 | BUG_ON(!len); | |
380 | ||
381 | i = unilen = 0; | |
382 | while ((unilen < MAX_NAME_LENGTH) && (i < len)) { | |
383 | i += convert_ch_to_uni(nls, (u8 *)(p_cstring+i), uniname, &lossy); | |
384 | ||
385 | if ((*uniname < 0x0020) || nls_wstrchr(bad_uni_chars, *uniname)) | |
386 | lossy |= NLS_NAME_LOSSY; | |
387 | ||
388 | *(upname+unilen) = nls_upper(sb, *uniname); | |
389 | ||
390 | uniname++; | |
391 | unilen++; | |
392 | } | |
393 | ||
394 | if (*(p_cstring+i) != '\0') | |
395 | lossy |= NLS_NAME_OVERLEN; | |
396 | ||
397 | *uniname = (u16)'\0'; | |
398 | p_uniname->name_len = unilen; | |
399 | p_uniname->name_hash = | |
400 | calc_chksum_2byte((void *) upname, unilen<<1, 0, CS_DEFAULT); | |
401 | ||
402 | if (p_lossy) | |
403 | *p_lossy = lossy; | |
404 | ||
405 | return unilen; | |
406 | } | |
407 | ||
408 | s32 nls_uni16s_to_vfsname(struct super_block *sb, UNI_NAME_T *uniname, u8 *p_cstring, s32 buflen) | |
409 | { | |
410 | if (SDFAT_SB(sb)->options.utf8) | |
411 | return __nls_utf16s_to_vfsname(sb, uniname, p_cstring, buflen); | |
412 | ||
413 | return __nls_uni16s_to_vfsname(sb, uniname, p_cstring, buflen); | |
414 | } | |
415 | ||
416 | s32 nls_vfsname_to_uni16s(struct super_block *sb, const u8 *p_cstring, const s32 len, UNI_NAME_T *uniname, s32 *p_lossy) | |
417 | { | |
418 | if (SDFAT_SB(sb)->options.utf8) | |
419 | return __nls_vfsname_to_utf16s(sb, p_cstring, len, uniname, p_lossy); | |
420 | return __nls_vfsname_to_uni16s(sb, p_cstring, len, uniname, p_lossy); | |
421 | } | |
422 | ||
423 | /*======================================================================*/ | |
424 | /* Local Function Definitions */ | |
425 | /*======================================================================*/ | |
426 | ||
427 | static s32 convert_ch_to_uni(struct nls_table *nls, u8 *ch, u16 *uni, s32 *lossy) | |
428 | { | |
429 | int len; | |
430 | ||
431 | *uni = 0x0; | |
432 | ||
433 | if (ch[0] < 0x80) { | |
434 | *uni = (u16) ch[0]; | |
435 | return 1; | |
436 | } | |
437 | ||
438 | len = nls->char2uni(ch, MAX_CHARSET_SIZE, uni); | |
439 | if (len < 0) { | |
440 | /* conversion failed */ | |
441 | DMSG("%s: fail to use nls\n", __func__); | |
442 | if (lossy != NULL) | |
443 | *lossy |= NLS_NAME_LOSSY; | |
444 | *uni = (u16) '_'; | |
445 | if (!strcmp(nls->charset, "utf8")) | |
446 | return 1; | |
447 | return 2; | |
448 | } | |
449 | ||
450 | return len; | |
451 | } /* end of convert_ch_to_uni */ | |
452 | ||
453 | static s32 convert_uni_to_ch(struct nls_table *nls, u16 uni, u8 *ch, s32 *lossy) | |
454 | { | |
455 | int len; | |
456 | ||
457 | ch[0] = 0x0; | |
458 | ||
459 | if (uni < 0x0080) { | |
460 | ch[0] = (u8) uni; | |
461 | return 1; | |
462 | } | |
463 | ||
464 | len = nls->uni2char(uni, ch, MAX_CHARSET_SIZE); | |
465 | if (len < 0) { | |
466 | /* conversion failed */ | |
467 | DMSG("%s: fail to use nls\n", __func__); | |
468 | if (lossy != NULL) | |
469 | *lossy |= NLS_NAME_LOSSY; | |
470 | ch[0] = '_'; | |
471 | return 1; | |
472 | } | |
473 | ||
474 | return len; | |
475 | ||
476 | } /* end of convert_uni_to_ch */ | |
477 | ||
478 | /* end of nls.c */ |