From 9fcb4d91c38c50fe1bbe8aa759ae10c6cdf28ad7 Mon Sep 17 00:00:00 2001 From: Romain Beauxis Date: Mon, 17 Jul 2023 23:52:16 -0500 Subject: [PATCH] * Add settings.log.recode and settings.log.recode.encoding. * Add settings.metadata.recode * Add doc. Fixes: #3231 --- doc/content/strings_encoding.md | 17 +++++++++ src/core/configure.ml | 31 ---------------- src/core/dune | 1 + src/core/request.ml | 15 ++++++-- src/core/tools/charset.ml | 45 ++--------------------- src/core/tools/charset_base.ml | 63 ++++++++++++++++++++++++++++++++ src/core/tools/log.ml | 64 +++++++++++++++++++++++++++++++-- src/runtime/main.ml | 3 ++ 8 files changed, 160 insertions(+), 79 deletions(-) create mode 100644 doc/content/strings_encoding.md create mode 100644 src/core/tools/charset_base.ml diff --git a/doc/content/strings_encoding.md b/doc/content/strings_encoding.md new file mode 100644 index 0000000000..ac87298326 --- /dev/null +++ b/doc/content/strings_encoding.md @@ -0,0 +1,17 @@ +# Strings encoding + +Liquidsoap operates internally using the UTF-8 string encoding. Most strings inside the application are converted +to UTF-8 whenever possible. Conversion is done using [camomile](https://github.com/ocaml-community/camomile) automatic +string encoding detection. If the conversion fails, the string is kept as-is. + +There are some exceptions, however. For instance, filenames and paths are not converted: if your system expects paths +to be in a different encoding than UTF-8 then we do need to keep strings representing files and paths in this encoding +to prevent errors. + +In general, you are advised to set the string encoding to UTF-8 on all systems running liquidsoap scripts for consistency +and clarity. + +However, if for some reasons you need to tweak string encoding, these settings can be of use: + +- `settings.log.recode` and `settings.log.recode.encoding`: set the first one to `true` and the second one to the string encoding you would like log entries to be converted into. +- `settings.metadata.recode`: set to `false` to prevent metadata from being converted to UTF-8. diff --git a/src/core/configure.ml b/src/core/configure.ml index 5dc971a42d..9d0d0c4c8a 100644 --- a/src/core/configure.ml +++ b/src/core/configure.ml @@ -1,5 +1,3 @@ -let add_subst = Utils.add_subst - open Liquidsoap_lang include Build_config include Liquidsoap_paths @@ -27,11 +25,6 @@ let libs_versions () = if version = "?" then name else name ^ "=" ^ version) |> String.concat " " -let () = - Lifecycle.before_init (fun () -> - add_subst "" (rundir ()); - add_subst "" (logdir ())) - let restart = ref false let vendor = @@ -48,30 +41,6 @@ let conf_init = conf#plug "init" Dtools.Init.conf; Dtools.Init.conf -let conf_console = - Dtools.Conf.void ~p:(conf#plug "console") "Console configuration" - -let conf_colorize = - Dtools.Conf.string - ~p:(conf_console#plug "colorize") - ~d: - (match !Console.color_conf with - | `Auto -> "auto" - | `Always -> "always" - | `Never -> "never") - "Use color in console output when available. One of: \"always\", \"never\" \ - or \"auto\"." - -let () = - let log = Log.make ["console"] in - conf_colorize#on_change (function - | "auto" -> Console.color_conf := `Auto - | "always" -> Console.color_conf := `Always - | "never" -> Console.color_conf := `Never - | _ -> - log#important "Invalid color configuration, using default \"auto\""; - Console.color_conf := `Auto) - let conf_debug = Dtools.Conf.bool ~p:(conf#plug "debug") ~d:!Term.conf_debug "Debug language features such as type inference and reduction." diff --git a/src/core/dune b/src/core/dune index e2474da506..0ce3dc9d10 100644 --- a/src/core/dune +++ b/src/core/dune @@ -61,6 +61,7 @@ avi_format biquad_filter blank + charset_base charset child_support chord diff --git a/src/core/request.ml b/src/core/request.ml index 1c9c8cf629..35f4c29b5f 100644 --- a/src/core/request.ml +++ b/src/core/request.ml @@ -294,6 +294,11 @@ let conf_duration = not recommended: the proper way is to have a script precompute the \ \"duration\" metadata." +let conf_recode = + Dtools.Conf.bool + ~p:(conf_metadata_decoders#plug "recode") + ~d:true "Re-encode metadata strings in UTF-8" + (** Sys.file_exists doesn't make a difference between existing files and files without enough permissions to list their attributes, for example when they are in a directory without x permission. The two following functions allow a @@ -323,14 +328,18 @@ let read_metadata t = else if not (file_is_readable name) then log#important "Read permission denied for %s!" (Lang_string.quote_string name) - else + else ( + let convert = + if conf_recode#get then fun x -> Charset.convert x else fun x -> x + in List.iter (fun (_, resolver) -> try let ans = resolver ~metadata:indicator.metadata name in List.iter (fun (k, v) -> - let k = String.lowercase_ascii k in + let k = String.lowercase_ascii (convert k) in + let v = convert v in if conf_override_metadata#get || get_metadata t k = None then Hashtbl.replace indicator.metadata k v) ans; @@ -340,7 +349,7 @@ let read_metadata t = (string_of_float (duration ~metadata:indicator.metadata name)) with Not_found -> ()) with _ -> ()) - (get_decoders conf_metadata_decoders mresolvers)) + (get_decoders conf_metadata_decoders mresolvers))) let local_check t = let check_decodable ctype = diff --git a/src/core/tools/charset.ml b/src/core/tools/charset.ml index 3e9d4a5846..39aa270263 100644 --- a/src/core/tools/charset.ml +++ b/src/core/tools/charset.ml @@ -20,50 +20,9 @@ *****************************************************************************) -let conf_camomile = - Dtools.Conf.void - ~p:(Configure.conf#plug "camomile") - "Settings related to camomile library (for charset conversion)." +include Charset_base -let conf_path = - Dtools.Conf.string - ~p:(conf_camomile#plug "path") - ~d:(Liquidsoap_paths.camomile_dir ()) - "Directory where camomile files are to be found." - -let conf_encoding = - Dtools.Conf.list - ~p:(conf_camomile#plug "encodings") - ~d:["UTF-8"; "ISO-8859-1"; "UTF-16"] - "List of encodings to try for automatic encoding detection." - -module C = CamomileLib.CharEncoding.Configure (struct - let basedir = conf_path#get - let datadir = Filename.concat basedir "database" - let localedir = Filename.concat basedir "locales" - let charmapdir = Filename.concat basedir "charmaps" - let unimapdir = Filename.concat basedir "mappings" -end) - -include C - -exception Unknown_encoding of string -exception Unsupported_encoding of t - -let of_string s = try C.of_name s with Not_found -> raise (Unknown_encoding s) -let to_string = C.name_of -let custom_encoding = ref None - -let automatic_encoding () = - match !custom_encoding with - | Some e -> e - | None -> - let encs = conf_encoding#get in - let e = C.automatic "auto" (List.map of_string encs) C.utf8 in - custom_encoding := Some e; - e - -let log = Log.make ["camomile"] +let log = Log.make ["charset"] let recode_string ~fail ~in_enc ~out_enc s = try diff --git a/src/core/tools/charset_base.ml b/src/core/tools/charset_base.ml new file mode 100644 index 0000000000..d7062e7626 --- /dev/null +++ b/src/core/tools/charset_base.ml @@ -0,0 +1,63 @@ +(***************************************************************************** + + Liquidsoap, a programmable audio stream generator. + Copyright 2003-2023 Savonet team + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details, fully stated in the COPYING + file at the root of the liquidsoap distribution. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + *****************************************************************************) + +let conf_charset = + Dtools.Conf.void + ~p:(Configure.conf#plug "charset") + "Settings related to charset conversion." + +let conf_path = + Dtools.Conf.string ~p:(conf_charset#plug "path") + ~d:(Liquidsoap_paths.camomile_dir ()) + "Directory where charset files are to be found." + +let conf_encoding = + Dtools.Conf.list + ~p:(conf_charset#plug "encodings") + ~d:["UTF-8"; "ISO-8859-1"; "UTF-16"] + "List of encodings to try for automatic encoding detection." + +module C = CamomileLib.CharEncoding.Configure (struct + let basedir = conf_path#get + let datadir = Filename.concat basedir "database" + let localedir = Filename.concat basedir "locales" + let charmapdir = Filename.concat basedir "charmaps" + let unimapdir = Filename.concat basedir "mappings" +end) + +include C + +exception Unknown_encoding of string +exception Unsupported_encoding of t + +let of_string s = try C.of_name s with Not_found -> raise (Unknown_encoding s) +let to_string = C.name_of +let custom_encoding = ref None + +let automatic_encoding () = + match !custom_encoding with + | Some e -> e + | None -> + let encs = conf_encoding#get in + let e = C.automatic "auto" (List.map of_string encs) C.utf8 in + custom_encoding := Some e; + e diff --git a/src/core/tools/log.ml b/src/core/tools/log.ml index 0e17422a22..d18252a151 100644 --- a/src/core/tools/log.ml +++ b/src/core/tools/log.ml @@ -22,6 +22,20 @@ (** Logging functions. *) +let conf_recode = + Dtools.Conf.bool + ~p:(Dtools.Log.conf#plug "recode") + ~d:false + "Recode log entries. Source encoding is set using \ + `settings.charset.encodings`." + +let conf_encoding = + Dtools.Conf.string + ~p:(conf_recode#plug "encoding") + ~d:"UTF-8" "Encoding to recode log entries to." + +let recode = ref (fun s -> s) + type t = < active : int -> bool ; f : 'a. int -> ('a, unit, string, unit) format4 -> 'a @@ -35,11 +49,12 @@ type t = let make path : t = let colorize colors { Dtools.Log.time; label; level; log } = + let recode = !recode in { Dtools.Log.time; - label = Option.map (Console.colorize [`green]) label; + label = Option.map (fun s -> Console.colorize [`green] (recode s)) label; level; - log = Console.colorize colors log; + log = Console.colorize colors (recode log); } in let log = Dtools.Log.make path in @@ -85,3 +100,48 @@ let make path : t = method set_level lvl = (Dtools.Conf.as_int (Dtools.Log.conf_level#ut#path log#path))#set lvl end + +let () = + let log = make ["log"] in + let set_recode () = + recode := + match Charset_base.of_string conf_encoding#get with + | out_enc -> ( + fun s -> + let in_enc = Charset_base.automatic_encoding () in + try Charset_base.recode_string ~in_enc ~out_enc s + with exn -> + log#important "Failed to convert %S: unknown error %s" s + (Printexc.to_string exn); + s) + | exception _ -> + log#severe "Invalid target encoding for log conversion: %s" + conf_encoding#get; + fun s -> s + in + conf_recode#on_change (fun recode -> if recode then set_recode ()); + conf_encoding#on_change (fun _ -> if conf_recode#get then set_recode ()) + +let conf_console = + Dtools.Conf.void ~p:(Configure.conf#plug "console") "Console configuration" + +let conf_colorize = + Dtools.Conf.string + ~p:(conf_console#plug "colorize") + ~d: + (match !Console.color_conf with + | `Auto -> "auto" + | `Always -> "always" + | `Never -> "never") + "Use color in console output when available. One of: \"always\", \"never\" \ + or \"auto\"." + +let () = + let log = make ["console"] in + conf_colorize#on_change (function + | "auto" -> Console.color_conf := `Auto + | "always" -> Console.color_conf := `Always + | "never" -> Console.color_conf := `Never + | _ -> + log#important "Invalid color configuration, using default \"auto\""; + Console.color_conf := `Auto) diff --git a/src/runtime/main.ml b/src/runtime/main.ml index ef24725521..a27dd8872a 100644 --- a/src/runtime/main.ml +++ b/src/runtime/main.ml @@ -456,6 +456,9 @@ let () = Dtools.Init.conf_daemon_pidfile_path#set_d (Some "/