diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 2416b03ff2837..137f16feee084 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -430,6 +430,7 @@ Description: Show status of f2fs superblock in real time. 0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP 0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted 0x2000 SBI_IS_RESIZEFS resizefs is in process + 0x4000 SBI_IS_FREEZING freefs is in process ====== ===================== ================================= What: /sys/fs/f2fs//ckpt_thread_ioprio diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7123524a86b8b..bdc733c2561d1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3485,8 +3485,7 @@ difficult since unequal pointers can no longer be compared. However, if this command-line option is specified, then all normal pointers will have their true - value printed. Pointers printed via %pK may still be - hashed. This option should only be specified when + value printed. This option should only be specified when debugging the kernel. Please do not use on production kernels. @@ -4356,6 +4355,12 @@ fully seed the kernel's CRNG. Default is controlled by CONFIG_RANDOM_TRUST_CPU. + random.trust_bootloader={on,off} + [KNL] Enable or disable trusting the use of the + a seed passed by the bootloader (if available) to + fully seed the kernel's CRNG. Default is controlled + by CONFIG_RANDOM_TRUST_BOOTLOADER. + randomize_kstack_offset= [KNL] Enable or disable kernel stack offset randomization, which provides roughly 5 bits of diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst index c21b5823f1261..2cf5bae620367 100644 --- a/Documentation/admin-guide/mm/index.rst +++ b/Documentation/admin-guide/mm/index.rst @@ -32,6 +32,7 @@ the Linux memory management. idle_page_tracking ksm memory-hotplug + multigen_lru nommu-mmap numa_memory_policy numaperf diff --git a/Documentation/admin-guide/mm/multigen_lru.rst b/Documentation/admin-guide/mm/multigen_lru.rst new file mode 100644 index 0000000000000..4ea6a801dc56d --- /dev/null +++ b/Documentation/admin-guide/mm/multigen_lru.rst @@ -0,0 +1,146 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Multi-Gen LRU +============= +Quick start +=========== +Build the kernel with the following configurations. + +* ``CONFIG_LRU_GEN=y`` +* ``CONFIG_LRU_GEN_ENABLED=y`` + +All set! + +Runtime options +=============== +``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the +following subsections. + +Kill switch +----------- +``enable`` accepts different values to enable or disabled the +following components. The default value of this file depends on +``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled +unless some of them have unforeseen side effects. Writing to +``enable`` has no effect when a component is not supported by the +hardware, and valid values will be accepted even when the main switch +is off. + +====== =============================================================== +Values Components +====== =============================================================== +0x0001 The main switch for the multi-gen LRU. +0x0002 Clearing the accessed bit in leaf page table entries in large + batches, when MMU sets it (e.g., on x86). This behavior can + theoretically worsen lock contention (mmap_lock). If it is + disabled, the multi-gen LRU will suffer a minor performance + degradation. +0x0004 Clearing the accessed bit in non-leaf page table entries as + well, when MMU sets it (e.g., on x86). This behavior was not + verified on x86 varieties other than Intel and AMD. If it is + disabled, the multi-gen LRU will suffer a negligible + performance degradation. +[yYnN] Apply to all the components above. +====== =============================================================== + +E.g., +:: + + echo y >/sys/kernel/mm/lru_gen/enabled + cat /sys/kernel/mm/lru_gen/enabled + 0x0007 + echo 5 >/sys/kernel/mm/lru_gen/enabled + cat /sys/kernel/mm/lru_gen/enabled + 0x0005 + +Thrashing prevention +-------------------- +Personal computers are more sensitive to thrashing because it can +cause janks (lags when rendering UI) and negatively impact user +experience. The multi-gen LRU offers thrashing prevention to the +majority of laptop and desktop users who do not have ``oomd``. + +Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of +``N`` milliseconds from getting evicted. The OOM killer is triggered +if this working set cannot be kept in memory. In other words, this +option works as an adjustable pressure relief valve, and when open, it +terminates applications that are hopefully not being used. + +Based on the average human detectable lag (~100ms), ``N=1000`` usually +eliminates intolerable janks due to thrashing. Larger values like +``N=3000`` make janks less noticeable at the risk of premature OOM +kills. + +Experimental features +===================== +``/sys/kernel/debug/lru_gen`` accepts commands described in the +following subsections. Multiple command lines are supported, so does +concatenation with delimiters ``,`` and ``;``. + +``/sys/kernel/debug/lru_gen_full`` provides additional stats for +debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from +evicted generations in this file. + +Working set estimation +---------------------- +Working set estimation measures how much memory an application +requires in a given time interval, and it is usually done with little +impact on the performance of the application. E.g., data centers want +to optimize job scheduling (bin packing) to improve memory +utilizations. When a new job comes in, the job scheduler needs to find +out whether each server it manages can allocate a certain amount of +memory for this new job before it can pick a candidate. To do so, this +job scheduler needs to estimate the working sets of the existing jobs. + +When it is read, ``lru_gen`` returns a histogram of numbers of pages +accessed over different time intervals for each memcg and node. +``MAX_NR_GENS`` decides the number of bins for each histogram. +:: + + memcg memcg_id memcg_path + node node_id + min_gen_nr age_in_ms nr_anon_pages nr_file_pages + ... + max_gen_nr age_in_ms nr_anon_pages nr_file_pages + +Each generation contains an estimated number of pages that have been +accessed within ``age_in_ms`` non-cumulatively. E.g., ``min_gen_nr`` +contains the coldest pages and ``max_gen_nr`` contains the hottest +pages, since ``age_in_ms`` of the former is the largest and that of +the latter is the smallest. + +Users can write ``+ memcg_id node_id max_gen_nr +[can_swap[full_scan]]`` to ``lru_gen`` to create a new generation +``max_gen_nr+1``. ``can_swap`` defaults to the swap setting and, if it +is set to ``1``, it forces the scan of anon pages when swap is off. +``full_scan`` defaults to ``1`` and, if it is set to ``0``, it reduces +the overhead as well as the coverage when scanning page tables. + +A typical use case is that a job scheduler writes to ``lru_gen`` at a +certain time interval to create new generations, and it ranks the +servers it manages based on the sizes of their cold memory defined by +this time interval. + +Proactive reclaim +----------------- +Proactive reclaim induces memory reclaim when there is no memory +pressure and usually targets cold memory only. E.g., when a new job +comes in, the job scheduler wants to proactively reclaim memory on the +server it has selected to improve the chance of successfully landing +this new job. + +Users can write ``- memcg_id node_id min_gen_nr [swappiness +[nr_to_reclaim]]`` to ``lru_gen`` to evict generations less than or +equal to ``min_gen_nr``. Note that ``min_gen_nr`` should be less than +``max_gen_nr-1`` as ``max_gen_nr`` and ``max_gen_nr-1`` are not fully +aged and therefore cannot be evicted. ``swappiness`` overrides the +default value in ``/proc/sys/vm/swappiness``. ``nr_to_reclaim`` limits +the number of pages to evict. + +A typical use case is that a job scheduler writes to ``lru_gen`` +before it tries to land a new job on a server, and if it fails to +materialize the cold memory without impacting the existing jobs on +this server, it retries on the next server according to the ranking +result obtained from the working set estimation step described +earlier. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d359bcfadd39a..0f86e9f931293 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -795,6 +795,7 @@ bit 1 print system memory info bit 2 print timer info bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer +bit 5 print all printk messages in buffer ===== ============================================ So for example to print tasks and memory info on panic, user can:: diff --git a/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml b/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml index 87992db389b28..3698b4b0900f5 100644 --- a/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml +++ b/Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml @@ -92,6 +92,10 @@ properties: description: AMS Controller register space maxItems: 1 + clocks: + items: + - description: AMS reference clock + ranges: description: Maps the child address space for PS and/or PL. @@ -181,12 +185,15 @@ properties: required: - compatible - reg + - clocks - ranges additionalProperties: false examples: - | + #include + bus { #address-cells = <2>; #size-cells = <2>; @@ -196,6 +203,7 @@ examples: interrupt-parent = <&gic>; interrupts = <0 56 4>; reg = <0x0 0xffa50000 0x0 0x800>; + clocks = <&zynqmp_clk AMS_REF>; #address-cells = <1>; #size-cells = <1>; #io-channel-cells = <1>; diff --git a/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml b/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml index 85a8877c2f387..1e2df8cf2937b 100644 --- a/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml +++ b/Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml @@ -49,7 +49,8 @@ properties: description: Definition of the regulator used for the VDDD power supply. port: - $ref: /schemas/graph.yaml#/properties/port + $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false properties: endpoint: @@ -68,8 +69,11 @@ properties: - const: 1 - const: 2 + link-frequencies: true + required: - data-lanes + - link-frequencies required: - compatible diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml index 3a82b0b27fa0a..4fca71f343109 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml @@ -88,10 +88,9 @@ allOf: - mediatek,mt2701-smi-common then: properties: - clock: - items: - minItems: 3 - maxItems: 3 + clocks: + minItems: 3 + maxItems: 3 clock-names: items: - const: apb @@ -108,10 +107,9 @@ allOf: required: - mediatek,smi properties: - clock: - items: - minItems: 3 - maxItems: 3 + clocks: + minItems: 3 + maxItems: 3 clock-names: items: - const: apb @@ -133,10 +131,9 @@ allOf: then: properties: - clock: - items: - minItems: 4 - maxItems: 4 + clocks: + minItems: 4 + maxItems: 4 clock-names: items: - const: apb @@ -146,10 +143,9 @@ allOf: else: # for gen2 HW that don't have gals properties: - clock: - items: - minItems: 2 - maxItems: 2 + clocks: + minItems: 2 + maxItems: 2 clock-names: items: - const: apb diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml index eaeff1ada7f89..c5c32c9100457 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml @@ -79,11 +79,11 @@ allOf: then: properties: - clock: - items: - minItems: 3 - maxItems: 3 + clocks: + minItems: 2 + maxItems: 3 clock-names: + minItems: 2 items: - const: apb - const: smi @@ -91,10 +91,9 @@ allOf: else: properties: - clock: - items: - minItems: 2 - maxItems: 2 + clocks: + minItems: 2 + maxItems: 2 clock-names: items: - const: apb @@ -108,7 +107,6 @@ allOf: - mediatek,mt2701-smi-larb - mediatek,mt2712-smi-larb - mediatek,mt6779-smi-larb - - mediatek,mt8167-smi-larb - mediatek,mt8192-smi-larb - mediatek,mt8195-smi-larb diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml index bd217e6f5018a..5cd144a9ec992 100644 --- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml +++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml @@ -55,7 +55,7 @@ patternProperties: properties: reg: description: - Contains the native Ready/Busy IDs. + Contains the chip-select IDs. nand-ecc-engine: allOf: @@ -184,7 +184,7 @@ examples: nand-use-soft-ecc-engine; nand-ecc-algo = "bch"; - /* controller specific properties */ + /* NAND chip specific properties */ }; nand@1 { diff --git a/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml b/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml index cb554084bdf11..0df4e114fdd69 100644 --- a/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml +++ b/Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml @@ -145,7 +145,7 @@ examples: clocks = <&sys_clk>; pinctrl-0 = <&sgpio2_pins>; pinctrl-names = "default"; - reg = <0x1101059c 0x100>; + reg = <0x1101059c 0x118>; microchip,sgpio-port-ranges = <0 0>, <16 18>, <28 31>; bus-frequency = <25000000>; sgpio_in2: gpio@0 { diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml index 328ea59c5466f..8299662c2c096 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml @@ -99,6 +99,14 @@ patternProperties: enum: [2, 4, 6, 8, 10, 12, 14, 16] bias-pull-down: + oneOf: + - type: boolean + - enum: [100, 101, 102, 103] + description: mt8195 pull down PUPD/R0/R1 type define value. + - enum: [200, 201, 202, 203, 204, 205, 206, 207] + description: mt8195 pull down RSEL type define value. + - enum: [75000, 5000] + description: mt8195 pull down RSEL type si unit value(ohm). description: | For pull down type is normal, it don't need add RSEL & R1R0 define and resistance value. @@ -115,13 +123,6 @@ patternProperties: & "MTK_PULL_SET_RSEL_110" & "MTK_PULL_SET_RSEL_111" define in mt8195. It can also support resistance value(ohm) "75000" & "5000" in mt8195. - oneOf: - - enum: [100, 101, 102, 103] - - description: mt8195 pull down PUPD/R0/R1 type define value. - - enum: [200, 201, 202, 203, 204, 205, 206, 207] - - description: mt8195 pull down RSEL type define value. - - enum: [75000, 5000] - - description: mt8195 pull down RSEL type si unit value(ohm). An example of using RSEL define: pincontroller { @@ -146,6 +147,14 @@ patternProperties: }; bias-pull-up: + oneOf: + - type: boolean + - enum: [100, 101, 102, 103] + description: mt8195 pull up PUPD/R0/R1 type define value. + - enum: [200, 201, 202, 203, 204, 205, 206, 207] + description: mt8195 pull up RSEL type define value. + - enum: [1000, 1500, 2000, 3000, 4000, 5000, 10000, 75000] + description: mt8195 pull up RSEL type si unit value(ohm). description: | For pull up type is normal, it don't need add RSEL & R1R0 define and resistance value. @@ -163,13 +172,6 @@ patternProperties: define in mt8195. It can also support resistance value(ohm) "1000" & "1500" & "2000" & "3000" & "4000" & "5000" & "10000" & "75000" in mt8195. - oneOf: - - enum: [100, 101, 102, 103] - - description: mt8195 pull up PUPD/R0/R1 type define value. - - enum: [200, 201, 202, 203, 204, 205, 206, 207] - - description: mt8195 pull up RSEL type define value. - - enum: [1000, 1500, 2000, 3000, 4000, 5000, 10000, 75000] - - description: mt8195 pull up RSEL type si unit value(ohm). An example of using RSEL define: pincontroller { i2c0-pins { diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml index 35a8045b2c70d..53627c6e2ae32 100644 --- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml +++ b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml @@ -106,7 +106,7 @@ examples: dma-names = "rx", "tx"; flash@0 { - compatible = "spi-nor"; + compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <104000000>; spi-tx-bus-width = <2>; diff --git a/Documentation/devicetree/bindings/spi/spi-mxic.txt b/Documentation/devicetree/bindings/spi/spi-mxic.txt index 529f2dab2648a..7bcbb229b78bb 100644 --- a/Documentation/devicetree/bindings/spi/spi-mxic.txt +++ b/Documentation/devicetree/bindings/spi/spi-mxic.txt @@ -8,11 +8,13 @@ Required properties: - reg: should contain 2 entries, one for the registers and one for the direct mapping area - reg-names: should contain "regs" and "dirmap" -- interrupts: interrupt line connected to the SPI controller - clock-names: should contain "ps_clk", "send_clk" and "send_dly_clk" - clocks: should contain 3 entries for the "ps_clk", "send_clk" and "send_dly_clk" clocks +Optional properties: +- interrupts: interrupt line connected to the SPI controller + Example: spi@43c30000 { diff --git a/Documentation/devicetree/bindings/usb/usb-hcd.yaml b/Documentation/devicetree/bindings/usb/usb-hcd.yaml index 56853c17af667..1dc3d5d7b44fe 100644 --- a/Documentation/devicetree/bindings/usb/usb-hcd.yaml +++ b/Documentation/devicetree/bindings/usb/usb-hcd.yaml @@ -33,7 +33,7 @@ patternProperties: "^.*@[0-9a-f]{1,2}$": description: The hard wired USB devices type: object - $ref: /usb/usb-device.yaml + $ref: /schemas/usb/usb-device.yaml additionalProperties: true diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index 3b8f41395f6b5..c8f7a16cd0e3c 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -36,10 +36,10 @@ CXL Core .. kernel-doc:: drivers/cxl/cxl.h :internal: -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c :doc: cxl core -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c :identifiers: .. kernel-doc:: drivers/cxl/core/pmem.c diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst new file mode 100644 index 0000000000000..e7e8f1640f457 --- /dev/null +++ b/Documentation/hwmon/asus_ec_sensors.rst @@ -0,0 +1,54 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +Kernel driver asus_ec_sensors +================================= + +Supported boards: + * PRIME X570-PRO, + * Pro WS X570-ACE, + * ROG CROSSHAIR VIII DARK HERO, + * ROG CROSSHAIR VIII HERO (WI-FI) + * ROG CROSSHAIR VIII FORMULA, + * ROG CROSSHAIR VIII HERO, + * ROG CROSSHAIR VIII IMPACT, + * ROG STRIX B550-E GAMING, + * ROG STRIX B550-I GAMING, + * ROG STRIX X570-E GAMING, + * ROG STRIX X570-F GAMING, + * ROG STRIX X570-I GAMING + +Authors: + - Eugene Shalygin + +Description: +------------ +ASUS mainboards publish hardware monitoring information via Super I/O +chip and the ACPI embedded controller (EC) registers. Some of the sensors +are only available via the EC. + +The driver is aware of and reads the following sensors: + +1. Chipset (PCH) temperature +2. CPU package temperature +3. Motherboard temperature +4. Readings from the T_Sensor header +5. VRM temperature +6. CPU_Opt fan RPM +7. VRM heatsink fan RPM +8. Chipset fan RPM +9. Readings from the "Water flow meter" header (RPM) +10. Readings from the "Water In" and "Water Out" temperature headers +11. CPU current +12. CPU core voltage + +Sensor values are read from EC registers, and to avoid race with the board +firmware the driver acquires ACPI mutex, the one used by the WMI when its +methods access the EC. + +Module Parameters +----------------- + * mutex_path: string + The driver holds path to the ACPI mutex for each board (actually, + the path is mostly identical for them). If ASUS changes this path + in a future BIOS update, this parameter can be used to override + the stored in the driver value until it gets updated. diff --git a/Documentation/hwmon/asus_wmi_ec_sensors.rst b/Documentation/hwmon/asus_wmi_ec_sensors.rst deleted file mode 100644 index 1b287f229e86c..0000000000000 --- a/Documentation/hwmon/asus_wmi_ec_sensors.rst +++ /dev/null @@ -1,38 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0-or-later - -Kernel driver asus_wmi_ec_sensors -================================= - -Supported boards: - * PRIME X570-PRO, - * Pro WS X570-ACE, - * ROG CROSSHAIR VIII DARK HERO, - * ROG CROSSHAIR VIII FORMULA, - * ROG CROSSHAIR VIII HERO, - * ROG STRIX B550-E GAMING, - * ROG STRIX B550-I GAMING, - * ROG STRIX X570-E GAMING. - -Authors: - - Eugene Shalygin - -Description: ------------- -ASUS mainboards publish hardware monitoring information via Super I/O -chip and the ACPI embedded controller (EC) registers. Some of the sensors -are only available via the EC. - -ASUS WMI interface provides a method (BREC) to read data from EC registers, -which is utilized by this driver to publish those sensor readings to the -HWMON system. The driver is aware of and reads the following sensors: - -1. Chipset (PCH) temperature -2. CPU package temperature -3. Motherboard temperature -4. Readings from the T_Sensor header -5. VRM temperature -6. CPU_Opt fan RPM -7. Chipset fan RPM -8. Readings from the "Water flow meter" header (RPM) -9. Readings from the "Water In" and "Water Out" temperature headers -10. CPU current diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 003c865e9c212..fbcb48bc2a903 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -168,7 +168,16 @@ Trees - The finalized and tagged releases of all stable kernels can be found in separate branches per version at: - https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git + + - The release candidate of all stable kernel versions can be found at: + + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/ + + .. warning:: + The -stable-rc tree is a snapshot in time of the stable-queue tree and + will change frequently, hence will be rebased often. It should only be + used for testing purposes (e.g. to be consumed by CI systems). Review committee diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst index d5fd6ccc3dcbd..b73eb764a0017 100644 --- a/Documentation/security/SCTP.rst +++ b/Documentation/security/SCTP.rst @@ -15,10 +15,7 @@ For security module support, three SCTP specific hooks have been implemented:: security_sctp_assoc_request() security_sctp_bind_connect() security_sctp_sk_clone() - -Also the following security hook has been utilised:: - - security_inet_conn_established() + security_sctp_assoc_established() The usage of these hooks are described below with the SELinux implementation described in the `SCTP SELinux Support`_ chapter. @@ -122,11 +119,12 @@ calls **sctp_peeloff**\(3). @newsk - pointer to new sock structure. -security_inet_conn_established() -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Called when a COOKIE ACK is received:: +security_sctp_assoc_established() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Called when a COOKIE ACK is received, and the peer secid will be +saved into ``@asoc->peer_secid`` for client:: - @sk - pointer to sock structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of the COOKIE ACK packet. @@ -134,7 +132,7 @@ Security Hooks used for Association Establishment ------------------------------------------------- The following diagram shows the use of ``security_sctp_bind_connect()``, -``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when +``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when establishing an association. :: @@ -172,7 +170,7 @@ establishing an association. <------------------------------------------- COOKIE ACK | | sctp_sf_do_5_1E_ca | - Call security_inet_conn_established() | + Call security_sctp_assoc_established() | to set the peer label. | | | | If SCTP_SOCKET_TCP or peeled off @@ -198,7 +196,7 @@ hooks with the SELinux specifics expanded below:: security_sctp_assoc_request() security_sctp_bind_connect() security_sctp_sk_clone() - security_inet_conn_established() + security_sctp_assoc_established() security_sctp_assoc_request() @@ -271,12 +269,12 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and @newsk - pointer to new sock structure. -security_inet_conn_established() -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +security_sctp_assoc_established() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Called when a COOKIE ACK is received where it sets the connection's peer sid to that in ``@skb``:: - @sk - pointer to sock structure. + @asoc - pointer to sctp association structure. @skb - pointer to skbuff of the COOKIE ACK packet. diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index d25335993e553..9b52f50a68542 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -261,6 +261,10 @@ alc-sense-combo huawei-mbx-stereo Enable initialization verbs for Huawei MBX stereo speakers; might be risky, try this at your own risk +alc298-samsung-headphone + Samsung laptops with ALC298 +alc256-samsung-headphone + Samsung laptops with ALC256 ALC66x/67x/892 ============== diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt index 9a35f50798a65..2c573541ab712 100644 --- a/Documentation/sphinx/requirements.txt +++ b/Documentation/sphinx/requirements.txt @@ -1,2 +1,4 @@ +# jinja2>=3.1 is not compatible with Sphinx<4.0 +jinja2<3.1 sphinx_rtd_theme Sphinx==2.4.4 diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst index 44365c4574a37..b484343002269 100644 --- a/Documentation/vm/index.rst +++ b/Documentation/vm/index.rst @@ -25,6 +25,7 @@ algorithms. If you are looking for advice on simply allocating memory, see the ksm memory-model mmu_notifier + multigen_lru numa overcommit-accounting page_migration diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst new file mode 100644 index 0000000000000..cde60de16621b --- /dev/null +++ b/Documentation/vm/multigen_lru.rst @@ -0,0 +1,156 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Multi-Gen LRU +============= + +Design overview +=============== +Objectives +---------- +The design objectives are: + +* Good representation of access recency +* Try to profit from spatial locality +* Fast paths to make obvious choices +* Simple self-correcting heuristics + +The representation of access recency is at the core of all LRU +implementations. In the multi-gen LRU, each generation represents a +group of pages with similar access recency. Generations establish a +common frame of reference and therefore help make better choices, +e.g., between different memcgs on a computer or different computers in +a data center (for job scheduling). + +Exploiting spatial locality improves efficiency when gathering the +accessed bit. A rmap walk targets a single page and does not try to +profit from discovering a young PTE. A page table walk can sweep all +the young PTEs in an address space, but the address space can be too +large to make a profit. The key is to optimize both methods and use +them in combination. + +Fast paths reduce code complexity and runtime overhead. Unmapped pages +do not require TLB flushes; clean pages do not require writeback. +These facts are only helpful when other conditions, e.g., access +recency, are similar. With generations as a common frame of reference, +additional factors stand out. But obvious choices might not be good +choices; thus self-correction is required. + +The benefits of simple self-correcting heuristics are self-evident. +Again, with generations as a common frame of reference, this becomes +attainable. Specifically, pages in the same generation can be +categorized based on additional factors, and a feedback loop can +statistically compare the refault percentages across those categories +and infer which of them are better choices. + +Assumptions +----------- +The protection of hot pages and the selection of cold pages are based +on page access channels and patterns. There are two access channels: + +* Accesses through page tables +* Accesses through file descriptors + +The protection of the former channel is by design stronger because: + +1. The uncertainty in determining the access patterns of the former + channel is higher due to the approximation of the accessed bit. +2. The cost of evicting the former channel is higher due to the TLB + flushes required and the likelihood of encountering the dirty bit. +3. The penalty of underprotecting the former channel is higher because + applications usually do not prepare themselves for major page + faults like they do for blocked I/O. E.g., GUI applications + commonly use dedicated I/O threads to avoid blocking the rendering + threads. + +There are also two access patterns: + +* Accesses exhibiting temporal locality +* Accesses not exhibiting temporal locality + +For the reasons listed above, the former channel is assumed to follow +the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is +present, and the latter channel is assumed to follow the latter +pattern unless outlying refaults have been observed. + +Workflow overview +================= +Evictable pages are divided into multiple generations for each +``lruvec``. The youngest generation number is stored in +``lrugen->max_seq`` for both anon and file types as they are aged on +an equal footing. The oldest generation numbers are stored in +``lrugen->min_seq[]`` separately for anon and file types as clean file +pages can be evicted regardless of swap constraints. These three +variables are monotonically increasing. + +Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)`` +bits in order to fit into the gen counter in ``folio->flags``. Each +truncated generation number is an index to ``lrugen->lists[]``. The +sliding window technique is used to track at least ``MIN_NR_GENS`` and +at most ``MAX_NR_GENS`` generations. The gen counter stores a value +within ``[1, MAX_NR_GENS]`` while a page is on one of +``lrugen->lists[]``; otherwise it stores zero. + +Each generation is divided into multiple tiers. Tiers represent +different ranges of numbers of accesses through file descriptors. A +page accessed ``N`` times through file descriptors is in tier +``order_base_2(N)``. In contrast to moving across generations, which +requires the LRU lock, moving across tiers only requires operations on +``folio->flags`` and therefore has a negligible cost. A feedback loop +modeled after the PID controller monitors refaults over all the tiers +from anon and file types and decides which tiers from which types to +evict or protect. + +There are two conceptually independent procedures: the aging and the +eviction. They form a closed-loop system, i.e., the page reclaim. + +Aging +----- +The aging produces young generations. Given an ``lruvec``, it +increments ``max_seq`` when ``max_seq-min_seq+1`` approaches +``MIN_NR_GENS``. The aging promotes hot pages to the youngest +generation when it finds them accessed through page tables; the +demotion of cold pages happens consequently when it increments +``max_seq``. The aging uses page table walks and rmap walks to find +young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list`` +and calls ``walk_page_range()`` with each ``mm_struct`` on this list +to scan PTEs. On finding a young PTE, it clears the accessed bit and +updates the gen counter of the page mapped by this PTE to +``(max_seq%MAX_NR_GENS)+1``. After each iteration of this list, it +increments ``max_seq``. For the latter, when the eviction walks the +rmap and finds a young PTE, the aging scans the adjacent PTEs and +follows the same steps just described. + +Eviction +-------- +The eviction consumes old generations. Given an ``lruvec``, it +increments ``min_seq`` when ``lrugen->lists[]`` indexed by +``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to +evict from, it first compares ``min_seq[]`` to select the older type. +If both types are equally old, it selects the one whose first tier has +a lower refault percentage. The first tier contains single-use +unmapped clean pages, which are the best bet. The eviction sorts a +page according to the gen counter if the aging has found this page +accessed through page tables and updated the gen counter. It also +moves a page to the next generation, i.e., ``min_seq+1``, if this page +was accessed multiple times through file descriptors and the feedback +loop has detected outlying refaults from the tier this page is in. To +do this, the feedback loop uses the first tier as the baseline, for +the reason stated earlier. + +Summary +------- +The multi-gen LRU can be disassembled into the following parts: + +* Generations +* Page table walks +* Rmap walks +* Bloom filters +* The PID controller + +The aging and the eviction is a producer-consumer model; specifically, +the latter drives the former by the sliding window over generations. +Within the aging, rmap walks drive page table walks by inserting hot +densely populated page tables to the Bloom filters. Within the +eviction, the PID controller uses refaults as the feedback to select +types to evict and tiers to protect. diff --git a/MAINTAINERS b/MAINTAINERS index cd0f68d4a34a6..c79ea7056d472 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3056,6 +3056,12 @@ L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/asus_wmi_ec_sensors.c +ASUS EC HARDWARE MONITOR DRIVER +M: Eugene Shalygin +L: linux-hwmon@vger.kernel.org +S: Maintained +F: drivers/hwmon/asus-ec-sensors.c + ASUS WIRELESS RADIO CONTROL DRIVER M: João Paulo Rechi Vita L: platform-driver-x86@vger.kernel.org @@ -11113,6 +11119,13 @@ F: Documentation/litmus-tests/ F: Documentation/memory-barriers.txt F: tools/memory-model/ +LINUX RANDOM NUMBER GENERATOR (LRNG) DRIVER +M: Stephan Mueller +S: Maintained +W: https://www.chronox.de/lrng.html +F: drivers/char/lrng/ +F: include/linux/lrng.h + LIS3LV02D ACCELEROMETER DRIVER M: Eric Piel S: Maintained @@ -16373,8 +16386,7 @@ M: Linus Walleij M: Alvin Šipraga S: Maintained F: Documentation/devicetree/bindings/net/dsa/realtek-smi.txt -F: drivers/net/dsa/realtek-smi* -F: drivers/net/dsa/rtl83* +F: drivers/net/dsa/realtek/* REALTEK WIRELESS DRIVER (rtlwifi family) M: Ping-Ke Shih diff --git a/Makefile b/Makefile index 7214f075e1f06..1fdd609219d0b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -pf2 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 678a80713b213..b682d6bf2eb87 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1162,6 +1162,7 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET config RANDOMIZE_KSTACK_OFFSET_DEFAULT bool "Randomize kernel stack offset on syscall entry" depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET + depends on INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION >= 140000 help The kernel stack offset can be randomized (after pt_regs) by roughly 5 bits of entropy, frustrating memory corruption @@ -1322,6 +1323,15 @@ config DYNAMIC_SIGFRAME config HAVE_ARCH_NODE_DEV_GROUP bool +config ARCH_HAS_NONLEAF_PMD_YOUNG + bool + depends on PGTABLE_LEVELS > 2 + help + Architectures that select this option are capable of setting the + accessed bit in non-leaf PMD entries when using them as part of linear + address translations. Page table walkers that clear the accessed bit + may use this capability to reduce their search space. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 3515bc4f16a4f..00ff721da300e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -490,3 +490,4 @@ 558 common process_mrelease sys_process_mrelease 559 common futex_waitv sys_futex_waitv 560 common set_mempolicy_home_node sys_ni_syscall +561 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 8e90052f6f056..5f7f5aab361f1 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -43,7 +43,7 @@ SYSCALL_DEFINE0(arc_gettls) return task_thread_info(current)->thr_ptr; } -SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) +SYSCALL_DEFINE3(arc_usr_cmpxchg, int __user *, uaddr, int, expected, int, new) { struct pt_regs *regs = current_pt_regs(); u32 uval; diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index 21294f775a20f..89af57482bc8f 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -459,12 +459,26 @@ #size-cells = <0>; enable-method = "brcm,bcm2836-smp"; // for ARM 32-bit + /* Source for d/i-cache-line-size and d/i-cache-sets + * https://developer.arm.com/documentation/100095/0003 + * /Level-1-Memory-System/About-the-L1-memory-system?lang=en + * Source for d/i-cache-size + * https://www.raspberrypi.com/documentation/computers + * /processors.html#bcm2711 + */ cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a72"; reg = <0>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000d8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + i-cache-size = <0xc000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set + next-level-cache = <&l2>; }; cpu1: cpu@1 { @@ -473,6 +487,13 @@ reg = <1>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + i-cache-size = <0xc000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set + next-level-cache = <&l2>; }; cpu2: cpu@2 { @@ -481,6 +502,13 @@ reg = <2>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + i-cache-size = <0xc000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set + next-level-cache = <&l2>; }; cpu3: cpu@3 { @@ -489,6 +517,28 @@ reg = <3>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000f0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + i-cache-size = <0xc000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 48KiB(size)/64(line-size)=768ways/3-way set + next-level-cache = <&l2>; + }; + + /* Source for d/i-cache-line-size and d/i-cache-sets + * https://developer.arm.com/documentation/100095/0003 + * /Level-2-Memory-System/About-the-L2-memory-system?lang=en + * Source for d/i-cache-size + * https://www.raspberrypi.com/documentation/computers + * /processors.html#bcm2711 + */ + l2: l2-cache0 { + compatible = "cache"; + cache-size = <0x100000>; + cache-line-size = <64>; + cache-sets = <1024>; // 1MiB(size)/64(line-size)=16384ways/16-way set + cache-level = <2>; }; }; diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi index 0199ec98cd616..5dbdebc462594 100644 --- a/arch/arm/boot/dts/bcm2837.dtsi +++ b/arch/arm/boot/dts/bcm2837.dtsi @@ -40,12 +40,26 @@ #size-cells = <0>; enable-method = "brcm,bcm2836-smp"; // for ARM 32-bit + /* Source for d/i-cache-line-size and d/i-cache-sets + * https://developer.arm.com/documentation/ddi0500/e/level-1-memory-system + * /about-the-l1-memory-system?lang=en + * + * Source for d/i-cache-size + * https://magpi.raspberrypi.com/articles/raspberry-pi-3-specs-benchmarks + */ cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a53"; reg = <0>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000d8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu1: cpu@1 { @@ -54,6 +68,13 @@ reg = <1>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu2: cpu@2 { @@ -62,6 +83,13 @@ reg = <2>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu3: cpu@3 { @@ -70,6 +98,27 @@ reg = <3>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000f0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; + }; + + /* Source for cache-line-size + cache-sets + * https://developer.arm.com/documentation/ddi0500 + * /e/level-2-memory-system/about-the-l2-memory-system?lang=en + * Source for cache-size + * https://datasheets.raspberrypi.com/cm/cm1-and-cm3-datasheet.pdf + */ + l2: l2-cache0 { + compatible = "cache"; + cache-size = <0x80000>; + cache-line-size = <64>; + cache-sets = <512>; // 512KiB(size)/64(line-size)=8192ways/16-way set + cache-level = <2>; }; }; }; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 956a26d52a4c3..0a11bacffc1f1 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3482,8 +3482,7 @@ ti,timer-pwm; }; }; - - target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ + timer15_target: target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2c000 0x4>, <0x2c010 0x4>; @@ -3511,7 +3510,7 @@ }; }; - target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ + timer16_target: target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2e000 0x4>, <0x2e010 0x4>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 42bff117656cf..97ce0c4f1df7e 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1339,20 +1339,20 @@ }; /* Local timers, see ARM architected timer wrap erratum i940 */ -&timer3_target { +&timer15_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; -&timer4_target { +&timer16_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; diff --git a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi index d31a68672bfac..d7d756614edd1 100644 --- a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi +++ b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi @@ -260,7 +260,7 @@ }; uart3_data: uart3-data { - samsung,pins = "gpa1-4", "gpa1-4"; + samsung,pins = "gpa1-4", "gpa1-5"; samsung,pin-function = ; samsung,pin-pud = ; samsung,pin-drv = ; diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts index 39bbe18145cf2..f042954bdfa5d 100644 --- a/arch/arm/boot/dts/exynos5250-smdk5250.dts +++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts @@ -118,6 +118,9 @@ status = "okay"; ddc = <&i2c_2>; hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>; + vdd-supply = <&ldo8_reg>; + vdd_osc-supply = <&ldo10_reg>; + vdd_pll-supply = <&ldo8_reg>; }; &i2c_0 { diff --git a/arch/arm/boot/dts/exynos5420-smdk5420.dts b/arch/arm/boot/dts/exynos5420-smdk5420.dts index a4f0e3ffedbd3..07f65213aae65 100644 --- a/arch/arm/boot/dts/exynos5420-smdk5420.dts +++ b/arch/arm/boot/dts/exynos5420-smdk5420.dts @@ -124,6 +124,9 @@ hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_hpd_irq>; + vdd-supply = <&ldo6_reg>; + vdd_osc-supply = <&ldo7_reg>; + vdd_pll-supply = <&ldo6_reg>; }; &hsi2c_4 { diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts index 4f88e96d81ddb..d5c68d1ea707c 100644 --- a/arch/arm/boot/dts/imx53-m53menlo.dts +++ b/arch/arm/boot/dts/imx53-m53menlo.dts @@ -53,6 +53,31 @@ }; }; + lvds-decoder { + compatible = "ti,ds90cf364a", "lvds-decoder"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lvds_decoder_in: endpoint { + remote-endpoint = <&lvds0_out>; + }; + }; + + port@1 { + reg = <1>; + + lvds_decoder_out: endpoint { + remote-endpoint = <&panel_in>; + }; + }; + }; + }; + panel { compatible = "edt,etm0700g0dh6"; pinctrl-0 = <&pinctrl_display_gpio>; @@ -61,7 +86,7 @@ port { panel_in: endpoint { - remote-endpoint = <&lvds0_out>; + remote-endpoint = <&lvds_decoder_out>; }; }; }; @@ -450,7 +475,7 @@ reg = <2>; lvds0_out: endpoint { - remote-endpoint = <&panel_in>; + remote-endpoint = <&lvds_decoder_in>; }; }; }; diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi index 62b771c1d5a9a..f1c60b0cb143e 100644 --- a/arch/arm/boot/dts/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/imx7-colibri.dtsi @@ -40,7 +40,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -293,7 +293,7 @@ compatible = "fsl,sgtl5000"; #sound-dai-cells = <0>; reg = <0x0a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <®_module_3v3_avdd>; diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi index 49086c6b6a0a2..3df6dff7734ae 100644 --- a/arch/arm/boot/dts/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/imx7-mba7.dtsi @@ -302,7 +302,7 @@ tlv320aic32x4: audio-codec@18 { compatible = "ti,tlv320aic32x4"; reg = <0x18>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; ldoin-supply = <®_audio_3v3>; iov-supply = <®_audio_3v3>; diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts index e0751e6ba3c0f..a31de900139d6 100644 --- a/arch/arm/boot/dts/imx7d-nitrogen7.dts +++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts @@ -288,7 +288,7 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; }; diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts index 7b2198a9372c6..d917dc4f2f227 100644 --- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts +++ b/arch/arm/boot/dts/imx7d-pico-hobbit.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-pico-pi.dts b/arch/arm/boot/dts/imx7d-pico-pi.dts index 70bea95c06d83..f263e391e24cb 100644 --- a/arch/arm/boot/dts/imx7d-pico-pi.dts +++ b/arch/arm/boot/dts/imx7d-pico-pi.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 7813ef960f6ee..f053f51227417 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -385,14 +385,14 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; wlf,hp-cfg = <2 2 3>; wlf,gpio-cfg = <1 3>; assigned-clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_SRC>, <&clks IMX7D_PLL_AUDIO_POST_DIV>, - <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; assigned-clock-parents = <&clks IMX7D_PLL_AUDIO_POST_DIV>; assigned-clock-rates = <0>, <884736000>, <12288000>; }; diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts index 4f1edef06c922..e8734d218b9de 100644 --- a/arch/arm/boot/dts/imx7s-warp.dts +++ b/arch/arm/boot/dts/imx7s-warp.dts @@ -75,7 +75,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -232,7 +232,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <&vgen4_reg>; diff --git a/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi b/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi index 31f59de5190b8..7af41361c4800 100644 --- a/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi +++ b/arch/arm/boot/dts/openbmc-flash-layout-64.dtsi @@ -28,7 +28,7 @@ partitions { label = "rofs"; }; - rwfs@6000000 { + rwfs@2a00000 { reg = <0x2a00000 0x1600000>; // 22MB label = "rwfs"; }; diff --git a/arch/arm/boot/dts/openbmc-flash-layout.dtsi b/arch/arm/boot/dts/openbmc-flash-layout.dtsi index 6c26524e93e11..b47e14063c380 100644 --- a/arch/arm/boot/dts/openbmc-flash-layout.dtsi +++ b/arch/arm/boot/dts/openbmc-flash-layout.dtsi @@ -20,7 +20,7 @@ partitions { label = "kernel"; }; - rofs@c0000 { + rofs@4c0000 { reg = <0x4c0000 0x1740000>; label = "rofs"; }; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index 7dec0553636e5..51c365fdf3bfd 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -142,7 +142,8 @@ clocks { sleep_clk: sleep_clk { compatible = "fixed-clock"; - clock-frequency = <32768>; + clock-frequency = <32000>; + clock-output-names = "gcc_sleep_clk_src"; #clock-cells = <0>; }; diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom-msm8960.dtsi index 2a0ec97a264f2..a0f9ab7f08f34 100644 --- a/arch/arm/boot/dts/qcom-msm8960.dtsi +++ b/arch/arm/boot/dts/qcom-msm8960.dtsi @@ -146,7 +146,9 @@ reg = <0x108000 0x1000>; qcom,ipc = <&l2cc 0x8 2>; - interrupts = <0 19 0>, <0 21 0>, <0 22 0>; + interrupts = , + , + ; interrupt-names = "ack", "err", "wakeup"; regulators { @@ -192,7 +194,7 @@ compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm"; reg = <0x16440000 0x1000>, <0x16400000 0x1000>; - interrupts = <0 154 0x0>; + interrupts = ; clocks = <&gcc GSBI5_UART_CLK>, <&gcc GSBI5_H_CLK>; clock-names = "core", "iface"; status = "disabled"; @@ -318,7 +320,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x16080000 0x1000>; - interrupts = <0 147 0>; + interrupts = ; spi-max-frequency = <24000000>; cs-gpios = <&msmgpio 8 0>; diff --git a/arch/arm/boot/dts/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom-sdx55.dtsi index 8ac0492c76595..40f11159f061e 100644 --- a/arch/arm/boot/dts/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom-sdx55.dtsi @@ -413,7 +413,7 @@ <0x40000000 0xf1d>, <0x40000f20 0xc8>, <0x40001000 0x1000>, - <0x40002000 0x10000>, + <0x40200000 0x100000>, <0x01c03000 0x3000>; reg-names = "parf", "dbi", "elbi", "atu", "addr_space", "mmio"; diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 09c741e8ecb87..c700c3b19e4c4 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -415,7 +415,7 @@ pmecc: ecc-engine@f8014070 { compatible = "atmel,sama5d2-pmecc"; reg = <0xf8014070 0x490>, - <0xf8014500 0x100>; + <0xf8014500 0x200>; }; }; diff --git a/arch/arm/boot/dts/sama7g5.dtsi b/arch/arm/boot/dts/sama7g5.dtsi index eddcfbf4d2233..22520cdd37fc5 100644 --- a/arch/arm/boot/dts/sama7g5.dtsi +++ b/arch/arm/boot/dts/sama7g5.dtsi @@ -382,8 +382,6 @@ dmas = <&dma0 AT91_XDMAC_DT_PERID(7)>, <&dma0 AT91_XDMAC_DT_PERID(8)>; dma-names = "rx", "tx"; - atmel,use-dma-rx; - atmel,use-dma-tx; status = "disabled"; }; }; @@ -558,8 +556,6 @@ dmas = <&dma0 AT91_XDMAC_DT_PERID(21)>, <&dma0 AT91_XDMAC_DT_PERID(22)>; dma-names = "rx", "tx"; - atmel,use-dma-rx; - atmel,use-dma-tx; status = "disabled"; }; }; @@ -584,8 +580,6 @@ dmas = <&dma0 AT91_XDMAC_DT_PERID(23)>, <&dma0 AT91_XDMAC_DT_PERID(24)>; dma-names = "rx", "tx"; - atmel,use-dma-rx; - atmel,use-dma-tx; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index 827e887afbda4..13e1bdb3ddbf1 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -134,9 +134,9 @@ reg = <0xb4100000 0x1000>; interrupts = <0 105 0x4>; status = "disabled"; - dmas = <&dwdma0 12 0 1>, - <&dwdma0 13 1 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 13 0 1>, + <&dwdma0 12 1 0>; + dma-names = "rx", "tx"; }; thermal@e07008c4 { diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index c87b881b2c8bb..9135533676879 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -284,9 +284,9 @@ #size-cells = <0>; interrupts = <0 31 0x4>; status = "disabled"; - dmas = <&dwdma0 4 0 0>, - <&dwdma0 5 0 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 5 0 0>, + <&dwdma0 4 0 0>; + dma-names = "rx", "tx"; }; rtc@e0580000 { diff --git a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi index 3b65130affec8..6161f5906ec11 100644 --- a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi @@ -1190,7 +1190,7 @@ }; }; - sai2a_sleep_pins_c: sai2a-2 { + sai2a_sleep_pins_c: sai2a-sleep-2 { pins { pinmux = , /* SAI2_SCK_A */ , /* SAI2_SD_A */ diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi index b30bc1a25ebb9..084323d5c61cb 100644 --- a/arch/arm/boot/dts/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/sun8i-v3s.dtsi @@ -593,6 +593,17 @@ #size-cells = <0>; }; + gic: interrupt-controller@1c81000 { + compatible = "arm,gic-400"; + reg = <0x01c81000 0x1000>, + <0x01c82000 0x2000>, + <0x01c84000 0x2000>, + <0x01c86000 0x2000>; + interrupt-controller; + #interrupt-cells = <3>; + interrupts = ; + }; + csi1: camera@1cb4000 { compatible = "allwinner,sun8i-v3s-csi"; reg = <0x01cb4000 0x3000>; @@ -604,16 +615,5 @@ resets = <&ccu RST_BUS_CSI>; status = "disabled"; }; - - gic: interrupt-controller@1c81000 { - compatible = "arm,gic-400"; - reg = <0x01c81000 0x1000>, - <0x01c82000 0x2000>, - <0x01c84000 0x2000>, - <0x01c86000 0x2000>; - interrupt-controller; - #interrupt-cells = <3>; - interrupts = ; - }; }; }; diff --git a/arch/arm/boot/dts/tegra20-asus-tf101.dts b/arch/arm/boot/dts/tegra20-asus-tf101.dts index 020172ee7340e..e3267cda15cc9 100644 --- a/arch/arm/boot/dts/tegra20-asus-tf101.dts +++ b/arch/arm/boot/dts/tegra20-asus-tf101.dts @@ -442,11 +442,13 @@ serial@70006040 { compatible = "nvidia,tegra20-hsuart"; + /delete-property/ reg-shift; /* GPS BCM4751 */ }; serial@70006200 { compatible = "nvidia,tegra20-hsuart"; + /delete-property/ reg-shift; status = "okay"; /* Azurewave AW-NH615 BCM4329B1 */ diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi index de39c5465c0a9..0e19bd0a847c8 100644 --- a/arch/arm/boot/dts/tegra20-tamonten.dtsi +++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi @@ -183,8 +183,8 @@ }; conf_ata { nvidia,pins = "ata", "atb", "atc", "atd", "ate", - "cdev1", "cdev2", "dap1", "dtb", "gma", - "gmb", "gmc", "gmd", "gme", "gpu7", + "cdev1", "cdev2", "dap1", "dtb", "dtf", + "gma", "gmb", "gmc", "gmd", "gme", "gpu7", "gpv", "i2cp", "irrx", "irtx", "pta", "rm", "slxa", "slxk", "spia", "spib", "uac"; @@ -203,7 +203,7 @@ }; conf_crtp { nvidia,pins = "crtp", "dap2", "dap3", "dap4", - "dtc", "dte", "dtf", "gpu", "sdio1", + "dtc", "dte", "gpu", "sdio1", "slxc", "slxd", "spdi", "spdo", "spig", "uda"; nvidia,pull = ; diff --git a/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi b/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi index 85b43a86a26d9..c662ab261ed5f 100644 --- a/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi @@ -1080,6 +1080,7 @@ serial@70006040 { compatible = "nvidia,tegra30-hsuart"; + /delete-property/ reg-shift; status = "okay"; /* Broadcom GPS BCM47511 */ @@ -1087,6 +1088,7 @@ serial@70006200 { compatible = "nvidia,tegra30-hsuart"; + /delete-property/ reg-shift; status = "okay"; nvidia,adjust-baud-rates = <0 9600 100>, diff --git a/arch/arm/boot/dts/tegra30-pegatron-chagall.dts b/arch/arm/boot/dts/tegra30-pegatron-chagall.dts index f4b2d4218849c..8ce61035290b5 100644 --- a/arch/arm/boot/dts/tegra30-pegatron-chagall.dts +++ b/arch/arm/boot/dts/tegra30-pegatron-chagall.dts @@ -1103,6 +1103,7 @@ uartb: serial@70006040 { compatible = "nvidia,tegra30-hsuart"; + /delete-property/ reg-shift; status = "okay"; /* Broadcom GPS BCM47511 */ @@ -1110,6 +1111,7 @@ uartc: serial@70006200 { compatible = "nvidia,tegra30-hsuart"; + /delete-property/ reg-shift; status = "okay"; nvidia,adjust-baud-rates = <0 9600 100>, diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index fe8d760256a4c..3e3beb0cc33de 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -188,6 +188,7 @@ CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_MEDIA_SUPPORT=y CONFIG_MEDIA_CAMERA_SUPPORT=y +CONFIG_MEDIA_PLATFORM_SUPPORT=y CONFIG_V4L_PLATFORM_DRIVERS=y CONFIG_VIDEO_ASPEED=m CONFIG_VIDEO_ATMEL_ISI=m @@ -196,6 +197,7 @@ CONFIG_DRM_ATMEL_HLCDC=m CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_PANEL_EDP=y CONFIG_DRM_ASPEED_GFX=m +CONFIG_FB=y CONFIG_FB_IMX=y CONFIG_FB_ATMEL=y CONFIG_BACKLIGHT_ATMEL_LCDC=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 2b575792363e5..e4dba5461cb3e 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -102,6 +102,8 @@ config CRYPTO_AES_ARM_BS depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_LIB_AES + select CRYPTO_AES + select CRYPTO_CBC select CRYPTO_SIMD help Use a faster and more secure NEON based implementation of AES in CBC, diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S index a74289ebc8036..5f1b1ce10473a 100644 --- a/arch/arm/kernel/entry-ftrace.S +++ b/arch/arm/kernel/entry-ftrace.S @@ -22,10 +22,7 @@ * mcount can be thought of as a function called in the middle of a subroutine * call. As such, it needs to be transparent for both the caller and the * callee: the original lr needs to be restored when leaving mcount, and no - * registers should be clobbered. (In the __gnu_mcount_nc implementation, we - * clobber the ip register. This is OK because the ARM calling convention - * allows it to be clobbered in subroutines and doesn't use it to hold - * parameters.) + * registers should be clobbered. * * When using dynamic ftrace, we patch out the mcount call by a "pop {lr}" * instead of the __gnu_mcount_nc call (see arch/arm/kernel/ftrace.c). @@ -70,26 +67,25 @@ .macro __ftrace_regs_caller - sub sp, sp, #8 @ space for PC and CPSR OLD_R0, + str lr, [sp, #-8]! @ store LR as PC and make space for CPSR/OLD_R0, @ OLD_R0 will overwrite previous LR - add ip, sp, #12 @ move in IP the value of SP as it was - @ before the push {lr} of the mcount mechanism + ldr lr, [sp, #8] @ get previous LR - str lr, [sp, #0] @ store LR instead of PC + str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR - ldr lr, [sp, #8] @ get previous LR + str lr, [sp, #-4]! @ store previous LR as LR - str r0, [sp, #8] @ write r0 as OLD_R0 over previous LR + add lr, sp, #16 @ move in LR the value of SP as it was + @ before the push {lr} of the mcount mechanism - stmdb sp!, {ip, lr} - stmdb sp!, {r0-r11, lr} + push {r0-r11, ip, lr} @ stack content at this point: @ 0 4 48 52 56 60 64 68 72 - @ R0 | R1 | ... | LR | SP + 4 | previous LR | LR | PSR | OLD_R0 | + @ R0 | R1 | ... | IP | SP + 4 | previous LR | LR | PSR | OLD_R0 | - mov r3, sp @ struct pt_regs* + mov r3, sp @ struct pt_regs* ldr r2, =function_trace_op ldr r2, [r2] @ pointer to the current @@ -112,11 +108,9 @@ ftrace_graph_regs_call: #endif @ pop saved regs - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return + pop {r0-r11, ip, lr} @ restore r0 through r12 + ldr lr, [sp], #4 @ restore LR + ldr pc, [sp], #12 .endm #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -132,11 +126,9 @@ ftrace_graph_regs_call: bl prepare_ftrace_return @ pop registers saved in ftrace_regs_caller - ldmia sp!, {r0-r12} @ restore r0 through r12 - ldr ip, [sp, #8] @ restore PC - ldr lr, [sp, #4] @ restore LR - ldr sp, [sp, #0] @ restore SP - mov pc, ip @ return + pop {r0-r11, ip, lr} @ restore r0 through r12 + ldr lr, [sp], #4 @ restore LR + ldr pc, [sp], #12 .endm #endif @@ -202,16 +194,17 @@ ftrace_graph_call\suffix: .endm .macro mcount_exit - ldmia sp!, {r0-r3, ip, lr} - ret ip + ldmia sp!, {r0-r3} + ldr lr, [sp, #4] + ldr pc, [sp], #8 .endm ENTRY(__gnu_mcount_nc) UNWIND(.fnstart) #ifdef CONFIG_DYNAMIC_FTRACE - mov ip, lr - ldmia sp!, {lr} - ret ip + push {lr} + ldr lr, [sp, #4] + ldr pc, [sp], #8 #else __mcount #endif diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 6166ba38bf994..b74bfcf94fb1a 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -195,7 +195,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr) destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data); /* Check access in reasonable access range for both SWP and SWPB */ - if (!access_ok((address & ~3), 4)) { + if (!access_ok((void __user *)(address & ~3), 4)) { pr_debug("SWP{B} emulation: access to %p not allowed!\n", (void *)address); res = -EFAULT; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index cae4a748811f8..5d58aee24087f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -577,7 +577,7 @@ do_cache_op(unsigned long start, unsigned long end, int flags) if (end < start || flags) return -EINVAL; - if (!access_ok(start, end - start)) + if (!access_ok((void __user *)start, end - start)) return -EFAULT; return __do_cache_op(start, end); diff --git a/arch/arm/mach-iop32x/include/mach/entry-macro.S b/arch/arm/mach-iop32x/include/mach/entry-macro.S index 8e6766d4621eb..341e5d9a6616d 100644 --- a/arch/arm/mach-iop32x/include/mach/entry-macro.S +++ b/arch/arm/mach-iop32x/include/mach/entry-macro.S @@ -20,7 +20,7 @@ mrc p6, 0, \irqstat, c8, c0, 0 @ Read IINTSRC cmp \irqstat, #0 clzne \irqnr, \irqstat - rsbne \irqnr, \irqnr, #31 + rsbne \irqnr, \irqnr, #32 .endm .macro arch_ret_to_user, tmp1, tmp2 diff --git a/arch/arm/mach-iop32x/include/mach/irqs.h b/arch/arm/mach-iop32x/include/mach/irqs.h index c4e78df428e86..e09ae5f48aec5 100644 --- a/arch/arm/mach-iop32x/include/mach/irqs.h +++ b/arch/arm/mach-iop32x/include/mach/irqs.h @@ -9,6 +9,6 @@ #ifndef __IRQS_H #define __IRQS_H -#define NR_IRQS 32 +#define NR_IRQS 33 #endif diff --git a/arch/arm/mach-iop32x/irq.c b/arch/arm/mach-iop32x/irq.c index 2d48bf1398c10..d1e8824cbd824 100644 --- a/arch/arm/mach-iop32x/irq.c +++ b/arch/arm/mach-iop32x/irq.c @@ -32,14 +32,14 @@ static void intstr_write(u32 val) static void iop32x_irq_mask(struct irq_data *d) { - iop32x_mask &= ~(1 << d->irq); + iop32x_mask &= ~(1 << (d->irq - 1)); intctl_write(iop32x_mask); } static void iop32x_irq_unmask(struct irq_data *d) { - iop32x_mask |= 1 << d->irq; + iop32x_mask |= 1 << (d->irq - 1); intctl_write(iop32x_mask); } @@ -65,7 +65,7 @@ void __init iop32x_init_irq(void) machine_is_em7210()) *IOP3XX_PCIIRSR = 0x0f; - for (i = 0; i < NR_IRQS; i++) { + for (i = 1; i < NR_IRQS; i++) { irq_set_chip_and_handler(i, &ext_chip, handle_level_irq); irq_clear_status_flags(i, IRQ_NOREQUEST | IRQ_NOPROBE); } diff --git a/arch/arm/mach-iop32x/irqs.h b/arch/arm/mach-iop32x/irqs.h index 69858e4e905d1..e1dfc8b4e7d7e 100644 --- a/arch/arm/mach-iop32x/irqs.h +++ b/arch/arm/mach-iop32x/irqs.h @@ -7,36 +7,40 @@ #ifndef __IOP32X_IRQS_H #define __IOP32X_IRQS_H +/* Interrupts in Linux start at 1, hardware starts at 0 */ + +#define IOP_IRQ(x) ((x) + 1) + /* * IOP80321 chipset interrupts */ -#define IRQ_IOP32X_DMA0_EOT 0 -#define IRQ_IOP32X_DMA0_EOC 1 -#define IRQ_IOP32X_DMA1_EOT 2 -#define IRQ_IOP32X_DMA1_EOC 3 -#define IRQ_IOP32X_AA_EOT 6 -#define IRQ_IOP32X_AA_EOC 7 -#define IRQ_IOP32X_CORE_PMON 8 -#define IRQ_IOP32X_TIMER0 9 -#define IRQ_IOP32X_TIMER1 10 -#define IRQ_IOP32X_I2C_0 11 -#define IRQ_IOP32X_I2C_1 12 -#define IRQ_IOP32X_MESSAGING 13 -#define IRQ_IOP32X_ATU_BIST 14 -#define IRQ_IOP32X_PERFMON 15 -#define IRQ_IOP32X_CORE_PMU 16 -#define IRQ_IOP32X_BIU_ERR 17 -#define IRQ_IOP32X_ATU_ERR 18 -#define IRQ_IOP32X_MCU_ERR 19 -#define IRQ_IOP32X_DMA0_ERR 20 -#define IRQ_IOP32X_DMA1_ERR 21 -#define IRQ_IOP32X_AA_ERR 23 -#define IRQ_IOP32X_MSG_ERR 24 -#define IRQ_IOP32X_SSP 25 -#define IRQ_IOP32X_XINT0 27 -#define IRQ_IOP32X_XINT1 28 -#define IRQ_IOP32X_XINT2 29 -#define IRQ_IOP32X_XINT3 30 -#define IRQ_IOP32X_HPI 31 +#define IRQ_IOP32X_DMA0_EOT IOP_IRQ(0) +#define IRQ_IOP32X_DMA0_EOC IOP_IRQ(1) +#define IRQ_IOP32X_DMA1_EOT IOP_IRQ(2) +#define IRQ_IOP32X_DMA1_EOC IOP_IRQ(3) +#define IRQ_IOP32X_AA_EOT IOP_IRQ(6) +#define IRQ_IOP32X_AA_EOC IOP_IRQ(7) +#define IRQ_IOP32X_CORE_PMON IOP_IRQ(8) +#define IRQ_IOP32X_TIMER0 IOP_IRQ(9) +#define IRQ_IOP32X_TIMER1 IOP_IRQ(10) +#define IRQ_IOP32X_I2C_0 IOP_IRQ(11) +#define IRQ_IOP32X_I2C_1 IOP_IRQ(12) +#define IRQ_IOP32X_MESSAGING IOP_IRQ(13) +#define IRQ_IOP32X_ATU_BIST IOP_IRQ(14) +#define IRQ_IOP32X_PERFMON IOP_IRQ(15) +#define IRQ_IOP32X_CORE_PMU IOP_IRQ(16) +#define IRQ_IOP32X_BIU_ERR IOP_IRQ(17) +#define IRQ_IOP32X_ATU_ERR IOP_IRQ(18) +#define IRQ_IOP32X_MCU_ERR IOP_IRQ(19) +#define IRQ_IOP32X_DMA0_ERR IOP_IRQ(20) +#define IRQ_IOP32X_DMA1_ERR IOP_IRQ(21) +#define IRQ_IOP32X_AA_ERR IOP_IRQ(23) +#define IRQ_IOP32X_MSG_ERR IOP_IRQ(24) +#define IRQ_IOP32X_SSP IOP_IRQ(25) +#define IRQ_IOP32X_XINT0 IOP_IRQ(27) +#define IRQ_IOP32X_XINT1 IOP_IRQ(28) +#define IRQ_IOP32X_XINT2 IOP_IRQ(29) +#define IRQ_IOP32X_XINT3 IOP_IRQ(30) +#define IRQ_IOP32X_HPI IOP_IRQ(31) #endif diff --git a/arch/arm/mach-mmp/sram.c b/arch/arm/mach-mmp/sram.c index 6794e2db1ad5f..ecc46c31004f6 100644 --- a/arch/arm/mach-mmp/sram.c +++ b/arch/arm/mach-mmp/sram.c @@ -72,6 +72,8 @@ static int sram_probe(struct platform_device *pdev) if (!info) return -ENOMEM; + platform_set_drvdata(pdev, info); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { dev_err(&pdev->dev, "no memory resource defined\n"); @@ -107,8 +109,6 @@ static int sram_probe(struct platform_device *pdev) list_add(&info->node, &sram_bank_list); mutex_unlock(&sram_lock); - platform_set_drvdata(pdev, info); - dev_info(&pdev->dev, "initialized\n"); return 0; @@ -127,17 +127,19 @@ static int sram_remove(struct platform_device *pdev) struct sram_bank_info *info; info = platform_get_drvdata(pdev); - if (info == NULL) - return -ENODEV; - mutex_lock(&sram_lock); - list_del(&info->node); - mutex_unlock(&sram_lock); + if (info->sram_size) { + mutex_lock(&sram_lock); + list_del(&info->node); + mutex_unlock(&sram_lock); + + gen_pool_destroy(info->gpool); + iounmap(info->sram_virt); + kfree(info->pool_name); + } - gen_pool_destroy(info->gpool); - iounmap(info->sram_virt); - kfree(info->pool_name); kfree(info); + return 0; } diff --git a/arch/arm/mach-s3c/mach-jive.c b/arch/arm/mach-s3c/mach-jive.c index 285e1f0f4145a..0d7d408c37291 100644 --- a/arch/arm/mach-s3c/mach-jive.c +++ b/arch/arm/mach-s3c/mach-jive.c @@ -236,11 +236,11 @@ static int __init jive_mtdset(char *options) unsigned long set; if (options == NULL || options[0] == '\0') - return 0; + return 1; if (kstrtoul(options, 10, &set)) { printk(KERN_ERR "failed to parse mtdset=%s\n", options); - return 0; + return 1; } switch (set) { @@ -256,7 +256,7 @@ static int __init jive_mtdset(char *options) "using default.", set); } - return 0; + return 1; } /* parse the mtdset= option given to the kernel command line */ diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index ac964612d8b07..90933eabe1156 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -464,3 +464,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c842878f81331..baa0e9bbe7547 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -683,6 +683,7 @@ config ARM64_ERRATUM_2051678 config ARM64_ERRATUM_2077057 bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2" + default y help This option adds the workaround for ARM Cortex-A510 erratum 2077057. Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi index 984c737fa627a..6e738f2a37013 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi @@ -273,9 +273,9 @@ #size-cells = <1>; ranges = <0x00 0x00 0xff800000 0x3000>; - timer: timer@400 { - compatible = "brcm,bcm6328-timer", "syscon"; - reg = <0x400 0x3c>; + twd: timer-mfd@400 { + compatible = "brcm,bcm4908-twd", "simple-mfd", "syscon"; + reg = <0x400 0x4c>; }; gpio0: gpio-controller@500 { @@ -330,7 +330,7 @@ reboot { compatible = "syscon-reboot"; - regmap = <&timer>; + regmap = <&twd>; offset = <0x34>; mask = <1>; }; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts index ec19fbf928a14..12a4b1c03390c 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts @@ -111,8 +111,8 @@ compatible = "silabs,si3226x"; reg = <0>; spi-max-frequency = <5000000>; - spi-cpha = <1>; - spi-cpol = <1>; + spi-cpha; + spi-cpol; pl022,hierarchy = <0>; pl022,interface = <0>; pl022,slave-tx-disable = <0>; @@ -135,8 +135,8 @@ at25,byte-len = <0x8000>; at25,addr-mode = <2>; at25,page-size = <64>; - spi-cpha = <1>; - spi-cpol = <1>; + spi-cpha; + spi-cpol; pl022,hierarchy = <0>; pl022,interface = <0>; pl022,slave-tx-disable = <0>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 2cfeaf3b0a876..8c218689fef70 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -687,7 +687,7 @@ }; }; - sata: ahci@663f2000 { + sata: sata@663f2000 { compatible = "brcm,iproc-ahci", "generic-ahci"; reg = <0x663f2000 0x1000>; dma-coherent; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 01b01e3204118..35d1939e690b0 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -536,9 +536,9 @@ clock-names = "i2c"; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(1)>; - dmas = <&edma0 1 39>, - <&edma0 1 38>; - dma-names = "tx", "rx"; + dmas = <&edma0 1 38>, + <&edma0 1 39>; + dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 687fea6d8afa4..4e7bd04d97984 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -499,9 +499,9 @@ interrupts = ; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(2)>; - dmas = <&edma0 1 39>, - <&edma0 1 38>; - dma-names = "tx", "rx"; + dmas = <&edma0 1 38>, + <&edma0 1 39>; + dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 66ec5615651d4..5dea37651adfc 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -748,7 +748,7 @@ snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; snps,dis_u3_susphy_quirk; - snps,ref-clock-period-ns = <0x32>; + snps,ref-clock-period-ns = <0x29>; dr_mode = "host"; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-j5.dts b/arch/arm64/boot/dts/qcom/msm8916-samsung-j5.dts index 687bea438a571..6c408d61de75a 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-samsung-j5.dts +++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-j5.dts @@ -41,7 +41,7 @@ }; home-key { - lable = "Home Key"; + label = "Home Key"; gpios = <&msmgpio 109 GPIO_ACTIVE_LOW>; linux,code = ; }; diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 5a9a5ed0565f6..215f56daa26c2 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -713,6 +713,9 @@ #reset-cells = <1>; #power-domain-cells = <1>; reg = <0xfc400000 0x2000>; + + clock-names = "xo", "sleep_clk"; + clocks = <&xo_board>, <&sleep_clk>; }; rpm_msg_ram: sram@fc428000 { diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 937c2e0e93eb9..eab7a85050531 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -1790,7 +1790,7 @@ }; }; - gmu: gmu@3d69000 { + gmu: gmu@3d6a000 { compatible="qcom,adreno-gmu-635.0", "qcom,adreno-gmu"; reg = <0 0x03d6a000 0 0x34000>, <0 0x3de0000 0 0x10000>, diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index cfdeaa81f1bbc..1bb4d98db96fa 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -3613,10 +3613,10 @@ #clock-cells = <0>; clock-frequency = <9600000>; clock-output-names = "mclk"; - qcom,micbias1-millivolt = <1800>; - qcom,micbias2-millivolt = <1800>; - qcom,micbias3-millivolt = <1800>; - qcom,micbias4-millivolt = <1800>; + qcom,micbias1-microvolt = <1800000>; + qcom,micbias2-microvolt = <1800000>; + qcom,micbias3-microvolt = <1800000>; + qcom,micbias4-microvolt = <1800000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 6012322a59846..78265646feff7 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3556,9 +3556,9 @@ qcom,tcs-offset = <0xd00>; qcom,drv-id = <2>; qcom,tcs-config = , - , - , - ; + , + , + ; rpmhcc: clock-controller { compatible = "qcom,sm8150-rpmh-clk"; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 5617a46e5ccdd..a92230bec1ddb 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -1740,8 +1740,8 @@ phys = <&pcie0_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 79 GPIO_ACTIVE_LOW>; - enable-gpio = <&tlmm 81 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 79 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 81 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie0_default_state>; @@ -1801,7 +1801,7 @@ ranges = <0x01000000 0x0 0x40200000 0x0 0x40200000 0x0 0x100000>, <0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>; - interrupts = ; + interrupts = ; interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; @@ -1844,8 +1844,8 @@ phys = <&pcie1_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 82 GPIO_ACTIVE_LOW>; - enable-gpio = <&tlmm 84 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 82 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 84 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie1_default_state>; @@ -1907,7 +1907,7 @@ ranges = <0x01000000 0x0 0x64200000 0x0 0x64200000 0x0 0x100000>, <0x02000000 0x0 0x64300000 0x0 0x64300000 0x0 0x3d00000>; - interrupts = ; + interrupts = ; interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; @@ -1950,8 +1950,8 @@ phys = <&pcie2_lane>; phy-names = "pciephy"; - perst-gpio = <&tlmm 85 GPIO_ACTIVE_LOW>; - enable-gpio = <&tlmm 87 GPIO_ACTIVE_HIGH>; + perst-gpios = <&tlmm 85 GPIO_ACTIVE_LOW>; + wake-gpios = <&tlmm 87 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie2_default_state>; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 4b19744bcfb34..765d018e6306c 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -1820,7 +1820,7 @@ qcom,tcs-offset = <0xd00>; qcom,drv-id = <2>; qcom,tcs-config = , , - , ; + , ; rpmhcc: clock-controller { compatible = "qcom,sm8350-rpmh-clk"; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 02b97e838c474..9ee055143f8a8 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -203,9 +203,9 @@ compatible = "arm,idle-state"; idle-state-name = "silver-rail-power-collapse"; arm,psci-suspend-param = <0x40000004>; - entry-latency-us = <274>; - exit-latency-us = <480>; - min-residency-us = <3934>; + entry-latency-us = <800>; + exit-latency-us = <750>; + min-residency-us = <4090>; local-timer-stop; }; @@ -213,9 +213,9 @@ compatible = "arm,idle-state"; idle-state-name = "gold-rail-power-collapse"; arm,psci-suspend-param = <0x40000004>; - entry-latency-us = <327>; - exit-latency-us = <1502>; - min-residency-us = <4488>; + entry-latency-us = <600>; + exit-latency-us = <1550>; + min-residency-us = <4791>; local-timer-stop; }; }; @@ -224,10 +224,10 @@ CLUSTER_SLEEP_0: cluster-sleep-0 { compatible = "domain-idle-state"; idle-state-name = "cluster-l3-off"; - arm,psci-suspend-param = <0x4100c344>; - entry-latency-us = <584>; - exit-latency-us = <2332>; - min-residency-us = <6118>; + arm,psci-suspend-param = <0x41000044>; + entry-latency-us = <1050>; + exit-latency-us = <2500>; + min-residency-us = <5309>; local-timer-stop; }; @@ -235,9 +235,9 @@ compatible = "domain-idle-state"; idle-state-name = "cluster-power-collapse"; arm,psci-suspend-param = <0x4100c344>; - entry-latency-us = <2893>; - exit-latency-us = <4023>; - min-residency-us = <9987>; + entry-latency-us = <2700>; + exit-latency-us = <3500>; + min-residency-us = <13959>; local-timer-stop; }; }; @@ -315,7 +315,7 @@ CLUSTER_PD: cpu-cluster0 { #power-domain-cells = <0>; - domain-idle-states = <&CLUSTER_SLEEP_0>; + domain-idle-states = <&CLUSTER_SLEEP_0>, <&CLUSTER_SLEEP_1>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts index c4dd2a6b48368..f81ce3240342c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts @@ -770,8 +770,8 @@ sd-uhs-sdr104; /* Power supply */ - vqmmc-supply = &vcc1v8_s3; /* IO line */ - vmmc-supply = &vcc_sdio; /* card's power */ + vqmmc-supply = <&vcc1v8_s3>; /* IO line */ + vmmc-supply = <&vcc_sdio>; /* card's power */ #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi index 012011dc619a5..ce4daff758e7e 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi @@ -59,7 +59,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */ - <0x00 0x01840000 0x00 0xC0000>; /* GICR */ + <0x00 0x01840000 0x00 0xC0000>, /* GICR */ + <0x01 0x00000000 0x00 0x2000>, /* GICC */ + <0x01 0x00010000 0x00 0x1000>, /* GICH */ + <0x01 0x00020000 0x00 0x2000>; /* GICV */ /* * vcpumntirq: * virtual CPU interface maintenance interrupt diff --git a/arch/arm64/boot/dts/ti/k3-am64.dtsi b/arch/arm64/boot/dts/ti/k3-am64.dtsi index 120974726be81..19684865d0d68 100644 --- a/arch/arm64/boot/dts/ti/k3-am64.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64.dtsi @@ -87,6 +87,7 @@ <0x00 0x68000000 0x00 0x68000000 0x00 0x08000000>, /* PCIe DAT0 */ <0x00 0x70000000 0x00 0x70000000 0x00 0x00200000>, /* OC SRAM */ <0x00 0x78000000 0x00 0x78000000 0x00 0x00800000>, /* Main R5FSS */ + <0x01 0x00000000 0x01 0x00000000 0x00 0x00310000>, /* A53 PERIPHBASE */ <0x06 0x00000000 0x06 0x00000000 0x01 0x00000000>, /* PCIe DAT1 */ <0x05 0x00000000 0x05 0x00000000 0x01 0x00000000>, /* FSS0 DAT3 */ diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index ce8bb4a61011e..e749343accedd 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -35,7 +35,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */ - <0x00 0x01880000 0x00 0x90000>; /* GICR */ + <0x00 0x01880000 0x00 0x90000>, /* GICR */ + <0x00 0x6f000000 0x00 0x2000>, /* GICC */ + <0x00 0x6f010000 0x00 0x1000>, /* GICH */ + <0x00 0x6f020000 0x00 0x2000>; /* GICV */ /* * vcpumntirq: * virtual CPU interface maintenance interrupt diff --git a/arch/arm64/boot/dts/ti/k3-am65.dtsi b/arch/arm64/boot/dts/ti/k3-am65.dtsi index a58a39fa42dbc..c538a0bf3cdda 100644 --- a/arch/arm64/boot/dts/ti/k3-am65.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65.dtsi @@ -86,6 +86,7 @@ <0x00 0x46000000 0x00 0x46000000 0x00 0x00200000>, <0x00 0x47000000 0x00 0x47000000 0x00 0x00068400>, <0x00 0x50000000 0x00 0x50000000 0x00 0x8000000>, + <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A53 PERIPHBASE */ <0x00 0x70000000 0x00 0x70000000 0x00 0x200000>, <0x05 0x00000000 0x05 0x00000000 0x01 0x0000000>, <0x07 0x00000000 0x07 0x00000000 0x01 0x0000000>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi index 05a627ad6cdc4..16684a2f054d9 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi @@ -54,7 +54,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */ - <0x00 0x01900000 0x00 0x100000>; /* GICR */ + <0x00 0x01900000 0x00 0x100000>, /* GICR */ + <0x00 0x6f000000 0x00 0x2000>, /* GICC */ + <0x00 0x6f010000 0x00 0x1000>, /* GICH */ + <0x00 0x6f020000 0x00 0x2000>; /* GICV */ /* vcpumntirq: virtual CPU interface maintenance interrupt */ interrupts = ; diff --git a/arch/arm64/boot/dts/ti/k3-j7200.dtsi b/arch/arm64/boot/dts/ti/k3-j7200.dtsi index 64fef4e67d76a..b6da0454cc5bd 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200.dtsi @@ -129,6 +129,7 @@ <0x00 0x00a40000 0x00 0x00a40000 0x00 0x00000800>, /* timesync router */ <0x00 0x01000000 0x00 0x01000000 0x00 0x0d000000>, /* Most peripherals */ <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>, /* MAIN NAVSS */ + <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A72 PERIPHBASE */ <0x00 0x70000000 0x00 0x70000000 0x00 0x00800000>, /* MSMC RAM */ <0x00 0x18000000 0x00 0x18000000 0x00 0x08000000>, /* PCIe1 DAT0 */ <0x41 0x00000000 0x41 0x00000000 0x01 0x00000000>, /* PCIe1 DAT1 */ diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index 599861259a30f..db0669985e42a 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -76,7 +76,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x10000>, /* GICD */ - <0x00 0x01900000 0x00 0x100000>; /* GICR */ + <0x00 0x01900000 0x00 0x100000>, /* GICR */ + <0x00 0x6f000000 0x00 0x2000>, /* GICC */ + <0x00 0x6f010000 0x00 0x1000>, /* GICH */ + <0x00 0x6f020000 0x00 0x2000>; /* GICV */ /* vcpumntirq: virtual CPU interface maintenance interrupt */ interrupts = ; diff --git a/arch/arm64/boot/dts/ti/k3-j721e.dtsi b/arch/arm64/boot/dts/ti/k3-j721e.dtsi index 4a3872fce5339..0e23886c9fd1d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e.dtsi @@ -139,6 +139,7 @@ <0x00 0x0e000000 0x00 0x0e000000 0x00 0x01800000>, /* PCIe Core*/ <0x00 0x10000000 0x00 0x10000000 0x00 0x10000000>, /* PCIe DAT */ <0x00 0x64800000 0x00 0x64800000 0x00 0x00800000>, /* C71 */ + <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A72 PERIPHBASE */ <0x44 0x00000000 0x44 0x00000000 0x00 0x08000000>, /* PCIe2 DAT */ <0x44 0x10000000 0x44 0x10000000 0x00 0x08000000>, /* PCIe3 DAT */ <0x4d 0x80800000 0x4d 0x80800000 0x00 0x00800000>, /* C66_0 */ diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi index b04db1d3ab617..be7f39299894e 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2-main.dtsi @@ -34,7 +34,10 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x00 0x01800000 0x00 0x200000>, /* GICD */ - <0x00 0x01900000 0x00 0x100000>; /* GICR */ + <0x00 0x01900000 0x00 0x100000>, /* GICR */ + <0x00 0x6f000000 0x00 0x2000>, /* GICC */ + <0x00 0x6f010000 0x00 0x1000>, /* GICH */ + <0x00 0x6f020000 0x00 0x2000>; /* GICV */ /* vcpumntirq: virtual CPU interface maintenance interrupt */ interrupts = ; diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi index 7521963719ff9..6c5c02edb375d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2-mcu-wakeup.dtsi @@ -108,7 +108,7 @@ reg = <0x00 0x42110000 0x00 0x100>; gpio-controller; #gpio-cells = <2>; - interrupt-parent = <&main_gpio_intr>; + interrupt-parent = <&wkup_gpio_intr>; interrupts = <103>, <104>, <105>, <106>, <107>, <108>; interrupt-controller; #interrupt-cells = <2>; @@ -124,7 +124,7 @@ reg = <0x00 0x42100000 0x00 0x100>; gpio-controller; #gpio-cells = <2>; - interrupt-parent = <&main_gpio_intr>; + interrupt-parent = <&wkup_gpio_intr>; interrupts = <112>, <113>, <114>, <115>, <116>, <117>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi index fe5234c40f6ce..7b930a85a29d6 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi @@ -119,6 +119,7 @@ <0x00 0x18000000 0x00 0x18000000 0x00 0x08000000>, /* PCIe1 DAT0 */ <0x00 0x64800000 0x00 0x64800000 0x00 0x0070c000>, /* C71_1 */ <0x00 0x65800000 0x00 0x65800000 0x00 0x0070c000>, /* C71_2 */ + <0x00 0x6f000000 0x00 0x6f000000 0x00 0x00310000>, /* A72 PERIPHBASE */ <0x00 0x70000000 0x00 0x70000000 0x00 0x00400000>, /* MSMC RAM */ <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>, /* MAIN NAVSS */ <0x41 0x00000000 0x41 0x00000000 0x01 0x00000000>, /* PCIe1 DAT1 */ diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 30516dc0b70ec..7411e4f9b5545 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -939,7 +939,7 @@ CONFIG_DMADEVICES=y CONFIG_DMA_BCM2835=y CONFIG_DMA_SUN6I=m CONFIG_FSL_EDMA=y -CONFIG_IMX_SDMA=y +CONFIG_IMX_SDMA=m CONFIG_K3_DMA=y CONFIG_MV_XOR=y CONFIG_MV_XOR_V2=y diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index a11ccadd47d29..094701ec5500b 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,8 +1,8 @@ SECTIONS { #ifdef CONFIG_ARM64_MODULE_PLTS - .plt 0 (NOLOAD) : { BYTE(0) } - .init.plt 0 (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } #endif #ifdef CONFIG_KASAN_SW_TAGS diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 94e147e5456ca..85d509a08ce36 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -999,23 +999,13 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * page after fork() + CoW for pfn mappings. We don't always have a * hardware-managed access flag on arm64. */ -static inline bool arch_faults_on_old_pte(void) -{ - WARN_ON(preemptible()); - - return !cpu_has_hw_af(); -} -#define arch_faults_on_old_pte arch_faults_on_old_pte +#define arch_has_hw_pte_young cpu_has_hw_af /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. */ -static inline bool arch_wants_old_prefaulted_pte(void) -{ - return !arch_faults_on_old_pte(); -} -#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +#define arch_wants_old_prefaulted_pte cpu_has_hw_af static inline bool pud_sect_supported(void) { diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 86e0cc9b9c685..aa3d3607d5c8d 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -67,7 +67,8 @@ struct bp_hardening_data { DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); -static inline void arm64_apply_bp_hardening(void) +/* Called during entry so must be __always_inline */ +static __always_inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 4e65da3445c7a..14c95844f6c84 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 451 +#define __NR_compat_syscalls 452 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 604a2053d0067..91f2bb7199af9 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -907,6 +907,8 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_pmadv_ksm 451 +__SYSCALL(__NR_pmadv_ksm, sys_pmadv_ksm) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 6d45c63c64548..5777929d35bf4 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -233,17 +233,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn) __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -static void call_smc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_smc_arch_workaround_1(void) { arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void call_hvc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_hvc_arch_workaround_1(void) { arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void qcom_link_stack_sanitisation(void) +/* Called during entry so must be noinstr */ +static noinstr void qcom_link_stack_sanitisation(void) { u64 tmp; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d8aaf4b6f4320..3d66fba69016f 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -577,10 +577,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, { int err; - err = sigframe_alloc(user, &user->fpsimd_offset, - sizeof(struct fpsimd_context)); - if (err) - return err; + if (system_supports_fpsimd()) { + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; + } /* fault information, if valid */ if (add_all || current->thread.fault_code) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index db63cc885771a..9e26ec80d3175 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr); * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. + * + * Memory reservation for crash kernel either done early or deferred + * depending on DMA memory zones configs (ZONE_DMA) -- + * + * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized + * here instead of max_zone_phys(). This lets early reservation of + * crash kernel memory which has a dependency on arm64_dma_phys_limit. + * Reserving memory early for crash kernel allows linear creation of block + * mappings (greater than page-granularity) for all the memory bank rangs. + * In this scheme a comparatively quicker boot is observed. + * + * If ZONE_DMA configs are defined, crash kernel memory reservation + * is delayed until DMA zone memory range size initilazation performed in + * zone_sizes_init(). The defer is necessary to steer clear of DMA zone + * memory range to avoid overlap allocation. So crash kernel memory boundaries + * are not known when mapping all bank memory ranges, which otherwise means + * not possible to exclude crash kernel range from creating block mappings + * so page-granularity mappings are created for the entire memory range. + * Hence a slightly slower boot is observed. + * + * Note: Page-granularity mapppings are necessary for crash kernel memory + * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ -phys_addr_t arm64_dma_phys_limit __ro_after_init; +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) +phys_addr_t __ro_after_init arm64_dma_phys_limit; +#else +phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; +#endif #ifdef CONFIG_KEXEC_CORE /* @@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif - if (!arm64_dma_phys_limit) - arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -315,6 +339,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); + if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -361,7 +388,8 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - reserve_crashkernel(); + if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); memblock_dump_all(); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 49abbf43bf355..37b8230cda6a8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; static DEFINE_SPINLOCK(swapper_pgdir_lock); +static DEFINE_MUTEX(fixmap_lock); void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) { @@ -329,6 +330,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } BUG_ON(p4d_bad(p4d)); + /* + * No need for locking during early boot. And it doesn't work as + * expected with KASLR enabled. + */ + if (system_state != SYSTEM_BOOTING) + mutex_lock(&fixmap_lock); pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -359,6 +366,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } while (pudp++, addr = next, addr != end); pud_clear_fixmap(); + if (system_state != SYSTEM_BOOTING) + mutex_unlock(&fixmap_lock); } static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, @@ -517,7 +526,7 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -528,6 +537,17 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map) { + if (IS_ENABLED(CONFIG_ZONE_DMA) || + IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + else if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif + /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -554,6 +574,25 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map && + !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } + } +#endif } void mark_rodata_ro(void) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index e96d4d87291f3..cbc41e261f1e7 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1049,15 +1049,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } - /* 1. Initial fake pass to compute ctx->idx. */ - - /* Fake pass to fill in ctx->offset. */ - if (build_body(&ctx, extra_pass)) { + /* + * 1. Initial fake pass to compute ctx->idx and ctx->offset. + * + * BPF line info needs ctx->offset[i] to be the offset of + * instruction[i] in jited image, so build prologue first. + */ + if (build_prologue(&ctx, was_classic)) { prog = orig_prog; goto out_off; } - if (build_prologue(&ctx, was_classic)) { + if (build_body(&ctx, extra_pass)) { prog = orig_prog; goto out_off; } @@ -1130,6 +1133,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog->jited_len = prog_size; if (!prog->is_func || extra_pass) { + int i; + + /* offset[prog->len] is the size of program */ + for (i = 0; i <= prog->len; i++) + ctx.offset[i] *= AARCH64_INSN_SIZE; bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: kfree(ctx.offset); diff --git a/arch/csky/include/asm/uaccess.h b/arch/csky/include/asm/uaccess.h index c40f06ee8d3ef..ac5a54f57d407 100644 --- a/arch/csky/include/asm/uaccess.h +++ b/arch/csky/include/asm/uaccess.h @@ -3,14 +3,13 @@ #ifndef __ASM_CSKY_UACCESS_H #define __ASM_CSKY_UACCESS_H -#define user_addr_max() \ - (uaccess_kernel() ? KERNEL_DS.seg : get_fs().seg) +#define user_addr_max() (current_thread_info()->addr_limit.seg) static inline int __access_ok(unsigned long addr, unsigned long size) { - unsigned long limit = current_thread_info()->addr_limit.seg; + unsigned long limit = user_addr_max(); - return ((addr < limit) && ((addr + size) < limit)); + return (size <= limit) && (addr <= (limit - size)); } #define __access_ok __access_ok diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c index 92057de08f4f0..1612f43540877 100644 --- a/arch/csky/kernel/perf_callchain.c +++ b/arch/csky/kernel/perf_callchain.c @@ -49,7 +49,7 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, { struct stackframe buftail; unsigned long lr = 0; - unsigned long *user_frame_tail = (unsigned long *)fp; + unsigned long __user *user_frame_tail = (unsigned long __user *)fp; /* Check accessibility of one struct frame_tail beyond */ if (!access_ok(user_frame_tail, sizeof(buftail))) diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index c7b763d2f526e..8867ddf3e6c77 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -136,7 +136,7 @@ static inline void __user *get_sigframe(struct ksignal *ksig, static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe *frame; + struct rt_sigframe __user *frame; int err = 0; frame = get_sigframe(ksig, regs, sizeof(*frame)); diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h index ef5bfef8d490c..719ba3f3c45cd 100644 --- a/arch/hexagon/include/asm/uaccess.h +++ b/arch/hexagon/include/asm/uaccess.h @@ -25,17 +25,17 @@ * Returns true (nonzero) if the memory block *may* be valid, false (zero) * if it is definitely invalid. * - * User address space in Hexagon, like x86, goes to 0xbfffffff, so the - * simple MSB-based tests used by MIPS won't work. Some further - * optimization is probably possible here, but for now, keep it - * reasonably simple and not *too* slow. After all, we've got the - * MMU for backup. */ +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) +#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE) -#define __access_ok(addr, size) \ - ((get_fs().seg == KERNEL_DS.seg) || \ - (((unsigned long)addr < get_fs().seg) && \ - (unsigned long)size < (get_fs().seg - (unsigned long)addr))) +static inline int __access_ok(unsigned long addr, unsigned long size) +{ + unsigned long limit = TASK_SIZE; + + return (size <= limit) && (addr <= (limit - size)); +} +#define __access_ok __access_ok /* * When a kernel-mode page fault is taken, the faulting instruction diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 78b1d03e86e1d..79ad5a5682b30 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -371,3 +371,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c index 0386252e9d043..4218750414bbf 100644 --- a/arch/m68k/coldfire/device.c +++ b/arch/m68k/coldfire/device.c @@ -480,7 +480,7 @@ static struct platform_device mcf_i2c5 = { #endif /* MCFI2C_BASE5 */ #endif /* IS_ENABLED(CONFIG_I2C_IMX) */ -#if IS_ENABLED(CONFIG_MCF_EDMA) +#ifdef MCFEDMA_BASE static const struct dma_slave_map mcf_edma_map[] = { { "dreq0", "rx-tx", MCF_EDMA_FILTER_PARAM(0) }, @@ -552,7 +552,7 @@ static struct platform_device mcf_edma = { .platform_data = &mcf_edma_data, } }; -#endif /* IS_ENABLED(CONFIG_MCF_EDMA) */ +#endif /* MCFEDMA_BASE */ #ifdef MCFSDHC_BASE static struct mcf_esdhc_platform_data mcf_esdhc_data = { @@ -651,7 +651,7 @@ static struct platform_device *mcf_devices[] __initdata = { &mcf_i2c5, #endif #endif -#if IS_ENABLED(CONFIG_MCF_EDMA) +#ifdef MCFEDMA_BASE &mcf_edma, #endif #ifdef MCFSDHC_BASE diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h index ba670523885c8..60b786eb2254e 100644 --- a/arch/m68k/include/asm/uaccess.h +++ b/arch/m68k/include/asm/uaccess.h @@ -12,14 +12,17 @@ #include /* We let the MMU do all checking */ -static inline int access_ok(const void __user *addr, +static inline int access_ok(const void __user *ptr, unsigned long size) { - /* - * XXX: for !CONFIG_CPU_HAS_ADDRESS_SPACES this really needs to check - * for TASK_SIZE! - */ - return 1; + unsigned long limit = TASK_SIZE; + unsigned long addr = (unsigned long)ptr; + + if (IS_ENABLED(CONFIG_CPU_HAS_ADDRESS_SPACES) || + !IS_ENABLED(CONFIG_MMU)) + return 1; + + return (size <= limit) && (addr <= (limit - size)); } /* diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index b1f3940bc2981..5ccf925567da0 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -450,3 +450,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index d2a8ef9f89787..3fe96979d2c62 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -39,24 +39,13 @@ # define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) -static inline int access_ok(const void __user *addr, unsigned long size) +static inline int __access_ok(unsigned long addr, unsigned long size) { - if (!size) - goto ok; + unsigned long limit = user_addr_max(); - if ((get_fs().seg < ((unsigned long)addr)) || - (get_fs().seg < ((unsigned long)addr + size - 1))) { - pr_devel("ACCESS fail at 0x%08x (size 0x%x), seg 0x%08x\n", - (__force u32)addr, (u32)size, - (u32)get_fs().seg); - return 0; - } -ok: - pr_devel("ACCESS OK at 0x%08x (size 0x%x), seg 0x%08x\n", - (__force u32)addr, (u32)size, - (u32)get_fs().seg); - return 1; + return (size <= limit) && (addr <= (limit - size)); } +#define access_ok(addr, size) __access_ok((unsigned long)addr, size) # define __FIXUP_SECTION ".section .fixup,\"ax\"\n" # define __EX_TABLE_SECTION ".section __ex_table,\"a\"\n" @@ -141,27 +130,27 @@ extern long __user_bad(void); #define __get_user(x, ptr) \ ({ \ - unsigned long __gu_val = 0; \ long __gu_err; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("lbu", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lbu", (ptr), x, __gu_err); \ break; \ case 2: \ - __get_user_asm("lhu", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lhu", (ptr), x, __gu_err); \ break; \ case 4: \ - __get_user_asm("lw", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lw", (ptr), x, __gu_err); \ break; \ - case 8: \ - __gu_err = __copy_from_user(&__gu_val, ptr, 8); \ - if (__gu_err) \ - __gu_err = -EFAULT; \ + case 8: { \ + __u64 __x = 0; \ + __gu_err = raw_copy_from_user(&__x, ptr, 8) ? \ + -EFAULT : 0; \ + (x) = (typeof(x))(typeof((x) - (x)))__x; \ break; \ + } \ default: \ /* __gu_val = 0; __gu_err = -EINVAL;*/ __gu_err = __user_bad();\ } \ - x = (__force __typeof__(*(ptr))) __gu_val; \ __gu_err; \ }) diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 820145e473501..6b76208597f3c 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 058446f01487c..651d4fe355da6 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -101,6 +101,7 @@ config MIPS select TRACE_IRQFLAGS_SUPPORT select VIRT_TO_BUS select ARCH_HAS_ELFCORE_COMPAT + select HAVE_ARCH_KCSAN if 64BIT config MIPS_FIXUP_BIGPHYS_ADDR bool diff --git a/arch/mips/Makefile b/arch/mips/Makefile index e036fc025cccb..4478c5661d61d 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -340,14 +340,12 @@ drivers-$(CONFIG_PM) += arch/mips/power/ boot-y := vmlinux.bin boot-y += vmlinux.ecoff boot-y += vmlinux.srec -ifeq ($(shell expr $(load-y) \< 0xffffffff80000000 2> /dev/null), 0) boot-y += uImage boot-y += uImage.bin boot-y += uImage.bz2 boot-y += uImage.gz boot-y += uImage.lzma boot-y += uImage.lzo -endif boot-y += vmlinux.itb boot-y += vmlinux.gz.itb boot-y += vmlinux.bz2.itb @@ -359,9 +357,7 @@ bootz-y := vmlinuz bootz-y += vmlinuz.bin bootz-y += vmlinuz.ecoff bootz-y += vmlinuz.srec -ifeq ($(shell expr $(zload-y) \< 0xffffffff80000000 2> /dev/null), 0) bootz-y += uzImage.bin -endif bootz-y += vmlinuz.itb # diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 5a15d51e88841..6cc28173bee89 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -38,6 +38,7 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n +KCSAN_SANITIZE := n # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o $(obj)/bswapsi.o diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c index 0a03529cf3178..3e4f5ba104f89 100644 --- a/arch/mips/crypto/crc32-mips.c +++ b/arch/mips/crypto/crc32-mips.c @@ -28,7 +28,7 @@ enum crc_type { }; #ifndef TOOLCHAIN_SUPPORTS_CRC -#define _ASM_MACRO_CRC32(OP, SZ, TYPE) \ +#define _ASM_SET_CRC(OP, SZ, TYPE) \ _ASM_MACRO_3R(OP, rt, rs, rt2, \ ".ifnc \\rt, \\rt2\n\t" \ ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ @@ -37,30 +37,36 @@ _ASM_MACRO_3R(OP, rt, rs, rt2, \ ((SZ) << 6) | ((TYPE) << 8)) \ _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ ((SZ) << 14) | ((TYPE) << 3))) -_ASM_MACRO_CRC32(crc32b, 0, 0); -_ASM_MACRO_CRC32(crc32h, 1, 0); -_ASM_MACRO_CRC32(crc32w, 2, 0); -_ASM_MACRO_CRC32(crc32d, 3, 0); -_ASM_MACRO_CRC32(crc32cb, 0, 1); -_ASM_MACRO_CRC32(crc32ch, 1, 1); -_ASM_MACRO_CRC32(crc32cw, 2, 1); -_ASM_MACRO_CRC32(crc32cd, 3, 1); -#define _ASM_SET_CRC "" +#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" #else /* !TOOLCHAIN_SUPPORTS_CRC */ -#define _ASM_SET_CRC ".set\tcrc\n\t" +#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" +#define _ASM_UNSET_CRC(op, SZ, TYPE) #endif -#define _CRC32(crc, value, size, type) \ -do { \ - __asm__ __volatile__( \ - ".set push\n\t" \ - _ASM_SET_CRC \ - #type #size " %0, %1, %0\n\t" \ - ".set pop" \ - : "+r" (crc) \ - : "r" (value)); \ +#define __CRC32(crc, value, op, SZ, TYPE) \ +do { \ + __asm__ __volatile__( \ + ".set push\n\t" \ + _ASM_SET_CRC(op, SZ, TYPE) \ + #op " %0, %1, %0\n\t" \ + _ASM_UNSET_CRC(op, SZ, TYPE) \ + ".set pop" \ + : "+r" (crc) \ + : "r" (value)); \ } while (0) +#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) +#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) +#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) +#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) +#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) +#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) +#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) +#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) + +#define _CRC32(crc, value, size, op) \ + _CRC32_##op##size(crc, value) + #define CRC32(crc, value, size) \ _CRC32(crc, value, size, crc32) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index ea5b5a83f1e11..011d1d678840a 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -131,7 +131,7 @@ */ mfc0 t0,CP0_CAUSE # get pending interrupts mfc0 t1,CP0_STATUS -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) lw t2,cpu_fpu_mask #endif andi t0,ST0_IM # CAUSE.CE may be non-zero! @@ -139,7 +139,7 @@ beqz t0,spurious -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) and t2,t0 bnez t2,fpu # handle FPU immediately #endif @@ -280,7 +280,7 @@ handle_it: j dec_irq_dispatch nop -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) fpu: lw t0,fpu_kstat_irq nop diff --git a/arch/mips/dec/prom/Makefile b/arch/mips/dec/prom/Makefile index d95016016b42b..2bad87551203b 100644 --- a/arch/mips/dec/prom/Makefile +++ b/arch/mips/dec/prom/Makefile @@ -6,4 +6,4 @@ lib-y += init.o memory.o cmdline.o identify.o console.o -lib-$(CONFIG_32BIT) += locore.o +lib-$(CONFIG_CPU_R3000) += locore.o diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index a8a30bb1dee8c..82b00e45ce50a 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -746,7 +746,8 @@ void __init arch_init_irq(void) dec_interrupt[DEC_IRQ_HALT] = -1; /* Register board interrupts: FPU and cascade. */ - if (dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { + if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT) && + dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { struct irq_desc *desc_fpu; int irq_fpu; diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h index 62c7dfb90e06c..1e1247add1cf8 100644 --- a/arch/mips/include/asm/dec/prom.h +++ b/arch/mips/include/asm/dec/prom.h @@ -43,16 +43,11 @@ */ #define REX_PROM_MAGIC 0x30464354 -#ifdef CONFIG_64BIT - -#define prom_is_rex(magic) 1 /* KN04 and KN05 are REX PROMs. */ - -#else /* !CONFIG_64BIT */ - -#define prom_is_rex(magic) ((magic) == REX_PROM_MAGIC) - -#endif /* !CONFIG_64BIT */ - +/* KN04 and KN05 are REX PROMs, so only do the check for R3k systems. */ +static inline bool prom_is_rex(u32 magic) +{ + return !IS_ENABLED(CONFIG_CPU_R3000) || magic == REX_PROM_MAGIC; +} /* * 3MIN/MAXINE PROM entry points for DS5000/1xx's, DS5000/xx's and diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index c7925d0e98746..867e9c3db76e9 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -15,6 +15,7 @@ #define __HAVE_ARCH_PMD_ALLOC_ONE #define __HAVE_ARCH_PUD_ALLOC_ONE +#define __HAVE_ARCH_PGD_FREE #include static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, @@ -48,6 +49,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern void pgd_init(unsigned long page); extern pgd_t *pgd_alloc(struct mm_struct *mm); +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + free_pages((unsigned long)pgd, PGD_ORDER); +} + #define __pte_free_tlb(tlb,pte,address) \ do { \ pgtable_pte_page_dtor(pte); \ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 253ff994ed2ec..e4aeedb17c383 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -389,3 +389,4 @@ 448 n32 process_mrelease sys_process_mrelease 449 n32 futex_waitv sys_futex_waitv 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 n32 pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 3f1886ad9d806..fe88db51efa00 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -365,3 +365,4 @@ 448 n64 process_mrelease sys_process_mrelease 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 n64 pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 8f243e35a7b20..674cb940bd153 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -438,3 +438,4 @@ 448 o32 process_mrelease sys_process_mrelease 449 o32 futex_waitv sys_futex_waitv 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 o32 pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index b131e6a773832..5cda07688f67a 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -2160,16 +2160,14 @@ static void build_r4000_tlb_load_handler(void) uasm_i_tlbr(&p); switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2_exec_hazard) { - uasm_i_ehb(&p); - fallthrough; - case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: - break; - } + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; } /* Examine entrylo 0 or 1 based on ptr. */ @@ -2236,15 +2234,14 @@ static void build_r4000_tlb_load_handler(void) uasm_i_tlbr(&p); switch (current_cpu_type()) { - default: - if (cpu_has_mips_r2_exec_hazard) { - uasm_i_ehb(&p); - case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: - break; - } + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; } /* Examine entrylo 0 or 1 based on ptr. */ diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 04684990e28ef..b7f6f782d9a13 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -301,11 +301,9 @@ static int __init plat_setup_devices(void) static int __init setup_kmac(char *s) { printk(KERN_INFO "korina mac = %s\n", s); - if (!mac_pton(s, korina_dev0_data.mac)) { + if (!mac_pton(s, korina_dev0_data.mac)) printk(KERN_ERR "Invalid mac\n"); - return -EINVAL; - } - return 0; + return 1; } __setup("kmac=", setup_kmac); diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index d65f55f67e19b..f72658b3a53f7 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Objects to go into the VDSO. +# Sanitizer runtimes are unavailable and cannot be linked here. + KCSAN_SANITIZE := n + # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. ARCH_REL_TYPE_ABS := R_MIPS_JUMP_SLOT|R_MIPS_GLOB_DAT diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index d4cbf069dc224..37a40981deb3b 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -70,9 +70,7 @@ static inline void set_fs(mm_segment_t fs) * versions are void (ie, don't return a value as such). */ -#define get_user __get_user \ - -#define __get_user(x, ptr) \ +#define get_user(x, ptr) \ ({ \ long __gu_err = 0; \ __get_user_check((x), (ptr), __gu_err); \ @@ -85,6 +83,14 @@ static inline void set_fs(mm_segment_t fs) (void)0; \ }) +#define __get_user(x, ptr) \ +({ \ + long __gu_err = 0; \ + const __typeof__(*(ptr)) __user *__p = (ptr); \ + __get_user_err((x), __p, (__gu_err)); \ + __gu_err; \ +}) + #define __get_user_check(x, ptr, err) \ ({ \ const __typeof__(*(ptr)) __user *__p = (ptr); \ @@ -165,12 +171,18 @@ do { \ : "r"(addr), "i"(-EFAULT) \ : "cc") -#define put_user __put_user \ +#define put_user(x, ptr) \ +({ \ + long __pu_err = 0; \ + __put_user_check((x), (ptr), __pu_err); \ + __pu_err; \ +}) #define __put_user(x, ptr) \ ({ \ long __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err); \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + __put_user_err((x), __p, __pu_err); \ __pu_err; \ }) diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index ba9340e96fd4c..ca9285a915efa 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -88,6 +88,7 @@ extern __must_check long strnlen_user(const char __user *s, long n); /* Optimized macros */ #define __get_user_asm(val, insn, addr, err) \ { \ + unsigned long __gu_val; \ __asm__ __volatile__( \ " movi %0, %3\n" \ "1: " insn " %1, 0(%2)\n" \ @@ -96,14 +97,20 @@ extern __must_check long strnlen_user(const char __user *s, long n); " .section __ex_table,\"a\"\n" \ " .word 1b, 2b\n" \ " .previous" \ - : "=&r" (err), "=r" (val) \ + : "=&r" (err), "=r" (__gu_val) \ : "r" (addr), "i" (-EFAULT)); \ + val = (__force __typeof__(*(addr)))__gu_val; \ } -#define __get_user_unknown(val, size, ptr, err) do { \ +extern void __get_user_unknown(void); + +#define __get_user_8(val, ptr, err) do { \ + u64 __val = 0; \ err = 0; \ - if (__copy_from_user(&(val), ptr, size)) { \ + if (raw_copy_from_user(&(__val), ptr, sizeof(val))) { \ err = -EFAULT; \ + } else { \ + val = (typeof(val))(typeof((val) - (val)))__val; \ } \ } while (0) @@ -119,8 +126,11 @@ do { \ case 4: \ __get_user_asm(val, "ldw", ptr, err); \ break; \ + case 8: \ + __get_user_8(val, ptr, err); \ + break; \ default: \ - __get_user_unknown(val, size, ptr, err); \ + __get_user_unknown(); \ break; \ } \ } while (0) @@ -129,9 +139,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val = 0; \ - __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\ - (x) = (__force __typeof__(x))__gu_val; \ + __get_user_common(x, sizeof(*(ptr)), __gu_ptr, __gu_err); \ __gu_err; \ }) @@ -139,11 +147,9 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val = 0; \ if (access_ok( __gu_ptr, sizeof(*__gu_ptr))) \ - __get_user_common(__gu_val, sizeof(*__gu_ptr), \ + __get_user_common(x, sizeof(*__gu_ptr), \ __gu_ptr, __gu_err); \ - (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ }) diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index 2009ae2d3c3bb..386e46443b605 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -36,10 +36,10 @@ struct rt_sigframe { static inline int rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw, - struct ucontext *uc, int *pr2) + struct ucontext __user *uc, int *pr2) { int temp; - unsigned long *gregs = uc->uc_mcontext.gregs; + unsigned long __user *gregs = uc->uc_mcontext.gregs; int err; /* Always make any pending restarted system calls return -EINTR */ @@ -102,10 +102,11 @@ asmlinkage int do_rt_sigreturn(struct switch_stack *sw) { struct pt_regs *regs = (struct pt_regs *)(sw + 1); /* Verify, can we follow the stack back */ - struct rt_sigframe *frame = (struct rt_sigframe *) regs->sp; + struct rt_sigframe __user *frame; sigset_t set; int rval; + frame = (struct rt_sigframe __user *) regs->sp; if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -124,10 +125,10 @@ asmlinkage int do_rt_sigreturn(struct switch_stack *sw) return 0; } -static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs) +static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *)regs - 1; - unsigned long *gregs = uc->uc_mcontext.gregs; + unsigned long __user *gregs = uc->uc_mcontext.gregs; int err = 0; err |= __put_user(MCONTEXT_VERSION, &uc->uc_mcontext.version); @@ -162,8 +163,9 @@ static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs) return err; } -static inline void *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, - size_t frame_size) +static inline void __user *get_sigframe(struct ksignal *ksig, + struct pt_regs *regs, + size_t frame_size) { unsigned long usp; @@ -174,13 +176,13 @@ static inline void *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, usp = sigsp(usp, ksig); /* Verify, is it 32 or 64 bit aligned */ - return (void *)((usp - frame_size) & -8UL); + return (void __user *)((usp - frame_size) & -8UL); } static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe *frame; + struct rt_sigframe __user *frame; int err = 0; frame = get_sigframe(ksig, regs, sizeof(*frame)); diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index 34619f010c631..0ccdb738a9a36 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -18,6 +18,7 @@ unsigned long parisc_acctyp(unsigned long code, unsigned int inst); const char *trap_name(unsigned long code); void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address); +int handle_nadtlb_fault(struct pt_regs *regs); #endif #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 94150b91c96fb..bce71cefe5724 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -558,15 +558,6 @@ static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm, } } -static void flush_user_cache_tlb(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - flush_tlb_range(vma, start, end); -} - void flush_cache_mm(struct mm_struct *mm) { struct vm_area_struct *vma; @@ -581,17 +572,8 @@ void flush_cache_mm(struct mm_struct *mm) return; } - preempt_disable(); - if (mm->context == mfsp(3)) { - for (vma = mm->mmap; vma; vma = vma->vm_next) - flush_user_cache_tlb(vma, vma->vm_start, vma->vm_end); - preempt_enable(); - return; - } - for (vma = mm->mmap; vma; vma = vma->vm_next) flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end); - preempt_enable(); } void flush_cache_range(struct vm_area_struct *vma, @@ -605,15 +587,7 @@ void flush_cache_range(struct vm_area_struct *vma, return; } - preempt_disable(); - if (vma->vm_mm->context == mfsp(3)) { - flush_user_cache_tlb(vma, start, end); - preempt_enable(); - return; - } - - flush_cache_pages(vma, vma->vm_mm, vma->vm_start, vma->vm_end); - preempt_enable(); + flush_cache_pages(vma, vma->vm_mm, start, end); } void diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 68b46fe2f17c5..e19e8deb4c322 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -448,3 +448,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b6fdebddc8e99..39576a9245c7f 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -662,6 +662,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) by hand. Technically we need to emulate: fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */ + if (code == 17 && handle_nadtlb_fault(regs)) + return; fault_address = regs->ior; fault_space = regs->isr; break; diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index e9eabf8f14d7e..f114e102aaf21 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -425,3 +425,92 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, } pagefault_out_of_memory(); } + +/* Handle non-access data TLB miss faults. + * + * For probe instructions, accesses to userspace are considered allowed + * if they lie in a valid VMA and the access type matches. We are not + * allowed to handle MM faults here so there may be situations where an + * actual access would fail even though a probe was successful. + */ +int +handle_nadtlb_fault(struct pt_regs *regs) +{ + unsigned long insn = regs->iir; + int breg, treg, xreg, val = 0; + struct vm_area_struct *vma, *prev_vma; + struct task_struct *tsk; + struct mm_struct *mm; + unsigned long address; + unsigned long acc_type; + + switch (insn & 0x380) { + case 0x280: + /* FDC instruction */ + fallthrough; + case 0x380: + /* PDC and FIC instructions */ + if (printk_ratelimit()) { + pr_warn("BUG: nullifying cache flush/purge instruction\n"); + show_regs(regs); + } + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + regs->gr[0] |= PSW_N; + return 1; + + case 0x180: + /* PROBE instruction */ + treg = insn & 0x1f; + if (regs->isr) { + tsk = current; + mm = tsk->mm; + if (mm) { + /* Search for VMA */ + address = regs->ior; + mmap_read_lock(mm); + vma = find_vma_prev(mm, address, &prev_vma); + mmap_read_unlock(mm); + + /* + * Check if access to the VMA is okay. + * We don't allow for stack expansion. + */ + acc_type = (insn & 0x40) ? VM_WRITE : VM_READ; + if (vma + && address >= vma->vm_start + && (vma->vm_flags & acc_type) == acc_type) + val = 1; + } + } + if (treg) + regs->gr[treg] = val; + regs->gr[0] |= PSW_N; + return 1; + + case 0x300: + /* LPA instruction */ + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + treg = insn & 0x1f; + if (treg) + regs->gr[treg] = 0; + regs->gr[0] |= PSW_N; + return 1; + + default: + break; + } + + return 0; +} diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f16ac1583c5d..887efa31f60ab 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -171,7 +171,7 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) endif -else +else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts new file mode 100644 index 0000000000000..73f8c998c64df --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * T1040RDB-REV-A Device Tree Source + * + * Copyright 2014 - 2015 Freescale Semiconductor Inc. + * + */ + +#include "t1040rdb.dts" + +/ { + model = "fsl,T1040RDB-REV-A"; + compatible = "fsl,T1040RDB-REV-A"; +}; + +&seville_port0 { + label = "ETH5"; +}; + +&seville_port2 { + label = "ETH7"; +}; + +&seville_port4 { + label = "ETH9"; +}; + +&seville_port6 { + label = "ETH11"; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index af0c8a6f56138..b6733e7e65805 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -119,7 +119,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_0>; phy-mode = "qsgmii"; - label = "ETH5"; + label = "ETH3"; status = "okay"; }; @@ -135,7 +135,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_2>; phy-mode = "qsgmii"; - label = "ETH7"; + label = "ETH5"; status = "okay"; }; @@ -151,7 +151,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_4>; phy-mode = "qsgmii"; - label = "ETH9"; + label = "ETH7"; status = "okay"; }; @@ -167,7 +167,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_6>; phy-mode = "qsgmii"; - label = "ETH11"; + label = "ETH9"; status = "okay"; }; diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index beba4979bff93..fee979d3a1aa4 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -359,25 +359,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) */ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stbcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stbcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr) { - __asm__ __volatile__("sthcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + sthcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stwcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stwcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stdcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stdcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } @@ -389,7 +401,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr) static inline u8 __raw_rm_readb(volatile void __iomem *paddr) { u8 ret; - __asm__ __volatile__("lbzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lbzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -397,7 +412,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr) static inline u16 __raw_rm_readw(volatile void __iomem *paddr) { u16 ret; - __asm__ __volatile__("lhzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lhzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -405,7 +423,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr) static inline u32 __raw_rm_readl(volatile void __iomem *paddr) { u32 ret; - __asm__ __volatile__("lwzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lwzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -413,7 +434,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr) static inline u64 __raw_rm_readq(volatile void __iomem *paddr) { u64 ret; - __asm__ __volatile__("ldcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + ldcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index b040094f79202..7ebc807aa8cc8 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -6,6 +6,8 @@ #define SET_MEMORY_RW 1 #define SET_MEMORY_NX 2 #define SET_MEMORY_X 3 +#define SET_MEMORY_NP 4 /* Set memory non present */ +#define SET_MEMORY_P 5 /* Set memory present */ int change_memory_attr(unsigned long addr, int numpages, long action); @@ -29,6 +31,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) return change_memory_attr(addr, numpages, SET_MEMORY_X); } -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot); +static inline int set_memory_np(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NP); +} + +static inline int set_memory_p(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_P); +} #endif diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 63316100080c1..4a35423f766db 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -125,8 +125,11 @@ do { \ */ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine altivec\n" \ "1: lvx 0,0,%1 # get user\n" \ " stvx 0,0,%2 # put kernel\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2600b4237292c..bb2f71a369415 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -530,3 +530,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cd0b8b71ecddc..384f58a3f373f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -582,8 +582,9 @@ void timer_rearm_host_dec(u64 now) local_paca->irq_happened |= PACA_IRQ_DEC; } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); } } EXPORT_SYMBOL_GPL(timer_rearm_host_dec); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940bc..5a0f023a26e90 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1 diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 84c89f08ae9aa..791db769080d2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6137,8 +6137,11 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; - if (kvmppc_radix_possible()) + if (kvmppc_radix_possible()) { r = kvmppc_radix_init(); + if (r) + return r; + } r = kvmppc_uvmem_init(); if (r < 0) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2ad0ccd202d5d..f0c4545dc3ab8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1499,7 +1499,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { @@ -1596,7 +1596,7 @@ int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; vcpu->arch.io_gpr = rs; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index bd3734d5be892..bf755a7be5147 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -112,9 +112,9 @@ static nokprobe_inline long address_ok(struct pt_regs *regs, { if (!user_mode(regs)) return 1; - if (__access_ok(ea, nb)) + if (access_ok((void __user *)ea, nb)) return 1; - if (__access_ok(ea, 1)) + if (access_ok((void __user *)ea, 1)) /* Access overlaps the end of the user region */ regs->dar = TASK_SIZE_MAX - 1; else @@ -1097,7 +1097,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -1110,7 +1113,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -3389,7 +3395,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eb8ecd7343a99..7ba6d3eff636d 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -567,18 +567,24 @@ NOKPROBE_SYMBOL(hash__do_page_fault); static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); + const char *msg; /* kernel has accessed a bad area */ + if (regs->dar < PAGE_SIZE) + msg = "Kernel NULL pointer dereference"; + else + msg = "Unable to handle kernel data access"; + switch (TRAP(regs)) { case INTERRUPT_DATA_STORAGE: - case INTERRUPT_DATA_SEGMENT: case INTERRUPT_H_DATA_STORAGE: - pr_alert("BUG: %s on %s at 0x%08lx\n", - regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : - "Unable to handle kernel data access", + pr_alert("BUG: %s on %s at 0x%08lx\n", msg, is_write ? "write" : "read", regs->dar); break; + case INTERRUPT_DATA_SEGMENT: + pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar); + break; case INTERRUPT_INST_STORAGE: case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index cf8770b1a692e..f3e4d069e0ba7 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -83,13 +83,12 @@ void __init kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { unsigned long k_cur; - phys_addr_t pa = __pa(kasan_early_shadow_page); for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pte, 0); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9d5f710d2c205..b9b7fefbb64b9 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -956,7 +956,9 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - node_set_online(nid); + /* node_set_online() is an UB if 'nid' is negative */ + if (likely(nid >= 0)) + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index edea388e9d3fb..3bb9d168e3b31 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -48,6 +48,12 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) case SET_MEMORY_X: pte = pte_mkexec(pte); break; + case SET_MEMORY_NP: + pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0); + break; + case SET_MEMORY_P: + pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0); + break; default: WARN_ON_ONCE(1); break; @@ -96,36 +102,3 @@ int change_memory_attr(unsigned long addr, int numpages, long action) return apply_to_existing_page_range(&init_mm, start, size, change_page_attr, (void *)action); } - -/* - * Set the attributes of a page: - * - * This function is used by PPC32 at the end of init to set final kernel memory - * protection. It includes changing the maping of the page it is executing from - * and data pages it is using. - */ -static int set_page_attr(pte_t *ptep, unsigned long addr, void *data) -{ - pgprot_t prot = __pgprot((unsigned long)data); - - spin_lock(&init_mm.page_table_lock); - - set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot)); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - - spin_unlock(&init_mm.page_table_lock); - - return 0; -} - -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot) -{ - unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); - unsigned long sz = numpages * PAGE_SIZE; - - if (numpages <= 0) - return 0; - - return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr, - (void *)pgprot_val(prot)); -} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 906e4e4328b2e..f71ededdc02a5 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,10 +135,12 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) + if (v_block_mapped((unsigned long)_sinittext)) { mmu_mark_initmem_nx(); - else - set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL); + } else { + set_memory_nx((unsigned long)_sinittext, numpages); + set_memory_rw((unsigned long)_sinittext, numpages); + } } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -152,18 +154,14 @@ void mark_rodata_ro(void) return; } - numpages = PFN_UP((unsigned long)_etext) - - PFN_DOWN((unsigned long)_stext); - - set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use __init_begin rather than __end_rodata - * to cover NOTES and EXCEPTION_TABLE. + * mark .text and .rodata as read only. Use __init_begin rather than + * __end_rodata to cover NOTES and EXCEPTION_TABLE. */ numpages = PFN_UP((unsigned long)__init_begin) - - PFN_DOWN((unsigned long)__start_rodata); + PFN_DOWN((unsigned long)_stext); - set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO); + set_memory_ro((unsigned long)_stext, numpages); // mark_initmem_nx() should have already run by now ptdump_check_wx(); @@ -179,8 +177,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) return; if (enable) - set_memory_attr(addr, numpages, PAGE_KERNEL); + set_memory_p(addr, numpages); else - set_memory_attr(addr, numpages, __pgprot(0)); + set_memory_np(addr, numpages); } #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e106909ff9c37..e7583fbcc8fa1 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1457,7 +1457,11 @@ static int trace_imc_event_init(struct perf_event *event) event->hw.idx = -1; - event->pmu->task_ctx_nr = perf_hw_context; + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; return 0; } diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index f2ba837249d69..04a6abf14c295 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,6 +153,7 @@ int __init mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; + goto out; } ret = 0; diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index b4386714494a6..e3d44b36ae98f 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) unsigned long parity; /* Calculate the parity of the value */ - asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); /* xor our value with the previous mask */ val ^= rng->mask; diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 90c9d3531694b..4ba8245681192 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -78,6 +78,9 @@ int remove_phb_dynamic(struct pci_controller *phb) pseries_msi_free_domains(phb); + /* Keep a reference so phb isn't freed yet */ + get_device(&host_bridge->dev); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); @@ -101,6 +104,7 @@ int remove_phb_dynamic(struct pci_controller *phb) * the pcibios_free_controller_deferred() callback; * see pseries_root_bridge_prepare(). */ + put_device(&host_bridge->dev); return 0; } diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 8963eaffb1b7b..39186ad6b3c3a 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -86,7 +86,7 @@ static LIST_HEAD(gtms); */ struct gtm_timer *gtm_get_timer16(void) { - struct gtm *gtm = NULL; + struct gtm *gtm; int i; list_for_each_entry(gtm, >ms, list_node) { @@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void) spin_unlock_irq(>m->lock); } - if (gtm) + if (!list_empty(>ms)) return ERR_PTR(-EBUSY); return ERR_PTR(-ENODEV); } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 1ca5564bda9d0..89c86f32aff86 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1708,20 +1708,20 @@ __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) static int __init xive_off(char *arg) { xive_cmdline_disabled = true; - return 0; + return 1; } __setup("xive=off", xive_off); static int __init xive_store_eoi_cmdline(char *arg) { if (!arg) - return -EINVAL; + return 1; if (strncmp(arg, "off", 3) == 0) { pr_info("StoreEOI disabled on kernel command line\n"); xive_store_eoi = false; } - return 0; + return 1; } __setup("xive.store-eoi=", xive_store_eoi_cmdline); diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts index 984872f3d3a9b..b9e30df127fef 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts @@ -203,6 +203,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts index 7ba99b4da3042..8d23401b0bbb6 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts @@ -205,6 +205,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts index be9b12c9b374a..24fd83b43d9d5 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts @@ -213,6 +213,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts index 031c0c28f8195..25341f38292aa 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts @@ -178,6 +178,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/include/asm/module.lds.h b/arch/riscv/include/asm/module.lds.h index 4254ff2ff0494..1075beae1ac64 100644 --- a/arch/riscv/include/asm/module.lds.h +++ b/arch/riscv/include/asm/module.lds.h @@ -2,8 +2,8 @@ /* Copyright (C) 2017 Andes Technology Corporation */ #ifdef CONFIG_MODULE_SECTIONS SECTIONS { - .plt (NOLOAD) : { BYTE(0) } - .got (NOLOAD) : { BYTE(0) } - .got.plt (NOLOAD) : { BYTE(0) } + .plt : { BYTE(0) } + .got : { BYTE(0) } + .got.plt : { BYTE(0) } } #endif diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 60da0dcacf145..74d888c8d631a 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -11,11 +11,17 @@ #include #include +#ifdef CONFIG_KASAN +#define KASAN_STACK_ORDER 1 +#else +#define KASAN_STACK_ORDER 0 +#endif + /* thread information allocation */ #ifdef CONFIG_64BIT -#define THREAD_SIZE_ORDER (2) +#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #else -#define THREAD_SIZE_ORDER (1) +#define THREAD_SIZE_ORDER (1 + KASAN_STACK_ORDER) #endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index dae29cbfe550b..7f2ad008274f3 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -21,7 +21,7 @@ const struct cpu_operations cpu_ops_sbi; * be invoked from multiple threads in parallel. Define a per cpu data * to handle that. */ -DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); +static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, unsigned long priv) diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 1fc075b8f764a..3348a61de7d99 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -15,8 +15,8 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, { struct stackframe buftail; unsigned long ra = 0; - unsigned long *user_frame_tail = - (unsigned long *)(fp - sizeof(struct stackframe)); + unsigned long __user *user_frame_tail = + (unsigned long __user *)(fp - sizeof(struct stackframe)); /* Check accessibility of one struct frame_tail beyond */ if (!access_ok(user_frame_tail, sizeof(buftail))) @@ -68,7 +68,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, static bool fill_callchain(void *entry, unsigned long pc) { - return perf_callchain_store(entry, pc); + return perf_callchain_store(entry, pc) == 0; } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 799147658dee2..1cd523748bd2e 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm sys_pmadv_ksm diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2296b1ff1e023..4e3db4004bfdc 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3869,14 +3869,12 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) return 0; } -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; union tod_clock clk; unsigned long i; - mutex_lock(&kvm->lock); preempt_disable(); store_tod_clock_ext(&clk); @@ -3897,7 +3895,22 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, kvm_s390_vcpu_unblock_all(kvm); preempt_enable(); +} + +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + mutex_lock(&kvm->lock); + __kvm_s390_set_tod_clock(kvm, gtod); + mutex_unlock(&kvm->lock); +} + +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + if (!mutex_trylock(&kvm->lock)) + return 0; + __kvm_s390_set_tod_clock(kvm, gtod); mutex_unlock(&kvm->lock); + return 1; } /** diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 098831e815e6c..f2c910763d7fa 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -349,8 +349,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 417154b314a64..6a765fe22eafc 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -102,7 +102,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); - kvm_s390_set_tod_clock(vcpu->kvm, >od); + /* + * To set the TOD clock the kvm lock must be taken, but the vcpu lock + * is already held in handle_set_clock. The usual lock order is the + * opposite. As SCK is deprecated and should not be used in several + * cases, for example when the multiple epoch facility or TOD clock + * steering facility is installed (see Principles of Operation), a + * slow path can be used. If the lock can not be taken via try_lock, + * the instruction will be retried via -EAGAIN at a later point in + * time. + */ + if (!kvm_s390_try_set_tod_clock(vcpu->kvm, >od)) { + kvm_s390_retry_instr(vcpu); + return -EAGAIN; + } kvm_s390_set_psw_cc(vcpu, 0); return 0; diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 2de85c977f54f..cfc75fa43eae4 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index ffab16369beac..74f80443b195f 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -65,7 +65,7 @@ struct rt_signal_frame { */ static inline bool invalid_frame_pointer(void __user *fp, int fplen) { - if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen)) + if ((((unsigned long) fp) & 15) || !access_ok(fp, fplen)) return true; return false; diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4398cc6fb68dd..d2c0a6426f6b8 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -496,3 +496,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 6ead1e2404576..8ca67a6926830 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -224,7 +224,7 @@ void mconsole_go(struct mc_request *req) void mconsole_stop(struct mc_request *req) { - deactivate_fd(req->originating_fd, MCONSOLE_IRQ); + block_signals(); os_set_fd_block(req->originating_fd, 1); mconsole_reply(req, "stopped", 0, 0); for (;;) { @@ -247,6 +247,7 @@ void mconsole_stop(struct mc_request *req) } os_set_fd_block(req->originating_fd, 0); mconsole_reply(req, "", 0, 0); + unblock_signals(); } static DEFINE_SPINLOCK(mc_devices_lock); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f5bd41bf660c..e787b7fc75be9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -85,6 +85,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_NONLEAF_PMD_YOUNG select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_COPY_MC if X86_64 select ARCH_HAS_SET_MEMORY diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 542377cd419d7..22b919cdb6d19 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -157,7 +157,7 @@ config MPENTIUM4 config MK6 - bool "K6/K6-II/K6-III" + bool "AMD K6/K6-II/K6-III" depends on X86_32 help Select this for an AMD K6-family processor. Enables use of @@ -165,7 +165,7 @@ config MK6 flags to GCC. config MK7 - bool "Athlon/Duron/K7" + bool "AMD Athlon/Duron/K7" depends on X86_32 help Select this for an AMD Athlon K7-family processor. Enables use of @@ -173,12 +173,98 @@ config MK7 flags to GCC. config MK8 - bool "Opteron/Athlon64/Hammer/K8" + bool "AMD Opteron/Athlon64/Hammer/K8" help Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. +config MK8SSE3 + bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" + help + Select this for improved AMD Opteron or Athlon64 Hammer-family processors. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MK10 + bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" + help + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, + Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MBARCELONA + bool "AMD Barcelona" + help + Select this for AMD Family 10h Barcelona processors. + + Enables -march=barcelona + +config MBOBCAT + bool "AMD Bobcat" + help + Select this for AMD Family 14h Bobcat processors. + + Enables -march=btver1 + +config MJAGUAR + bool "AMD Jaguar" + help + Select this for AMD Family 16h Jaguar processors. + + Enables -march=btver2 + +config MBULLDOZER + bool "AMD Bulldozer" + help + Select this for AMD Family 15h Bulldozer processors. + + Enables -march=bdver1 + +config MPILEDRIVER + bool "AMD Piledriver" + help + Select this for AMD Family 15h Piledriver processors. + + Enables -march=bdver2 + +config MSTEAMROLLER + bool "AMD Steamroller" + help + Select this for AMD Family 15h Steamroller processors. + + Enables -march=bdver3 + +config MEXCAVATOR + bool "AMD Excavator" + help + Select this for AMD Family 15h Excavator processors. + + Enables -march=bdver4 + +config MZEN + bool "AMD Zen" + help + Select this for AMD Family 17h Zen processors. + + Enables -march=znver1 + +config MZEN2 + bool "AMD Zen 2" + help + Select this for AMD Family 17h Zen 2 processors. + + Enables -march=znver2 + +config MZEN3 + bool "AMD Zen 3" + depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + help + Select this for AMD Family 19h Zen 3 processors. + + Enables -march=znver3 + config MCRUSOE bool "Crusoe" depends on X86_32 @@ -270,7 +356,7 @@ config MPSC in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. config MCORE2 - bool "Core 2/newer Xeon" + bool "Intel Core 2" help Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and @@ -278,6 +364,8 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + Enables -march=core2 + config MATOM bool "Intel Atom" help @@ -287,6 +375,182 @@ config MATOM accordingly optimized code. Use a recent GCC with specific Atom support in order to fully benefit from selecting this option. +config MNEHALEM + bool "Intel Nehalem" + select X86_P6_NOP + help + + Select this for 1st Gen Core processors in the Nehalem family. + + Enables -march=nehalem + +config MWESTMERE + bool "Intel Westmere" + select X86_P6_NOP + help + + Select this for the Intel Westmere formerly Nehalem-C family. + + Enables -march=westmere + +config MSILVERMONT + bool "Intel Silvermont" + select X86_P6_NOP + help + + Select this for the Intel Silvermont platform. + + Enables -march=silvermont + +config MGOLDMONT + bool "Intel Goldmont" + select X86_P6_NOP + help + + Select this for the Intel Goldmont platform including Apollo Lake and Denverton. + + Enables -march=goldmont + +config MGOLDMONTPLUS + bool "Intel Goldmont Plus" + select X86_P6_NOP + help + + Select this for the Intel Goldmont Plus platform including Gemini Lake. + + Enables -march=goldmont-plus + +config MSANDYBRIDGE + bool "Intel Sandy Bridge" + select X86_P6_NOP + help + + Select this for 2nd Gen Core processors in the Sandy Bridge family. + + Enables -march=sandybridge + +config MIVYBRIDGE + bool "Intel Ivy Bridge" + select X86_P6_NOP + help + + Select this for 3rd Gen Core processors in the Ivy Bridge family. + + Enables -march=ivybridge + +config MHASWELL + bool "Intel Haswell" + select X86_P6_NOP + help + + Select this for 4th Gen Core processors in the Haswell family. + + Enables -march=haswell + +config MBROADWELL + bool "Intel Broadwell" + select X86_P6_NOP + help + + Select this for 5th Gen Core processors in the Broadwell family. + + Enables -march=broadwell + +config MSKYLAKE + bool "Intel Skylake" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake family. + + Enables -march=skylake + +config MSKYLAKEX + bool "Intel Skylake X" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake X family. + + Enables -march=skylake-avx512 + +config MCANNONLAKE + bool "Intel Cannon Lake" + select X86_P6_NOP + help + + Select this for 8th Gen Core processors + + Enables -march=cannonlake + +config MICELAKE + bool "Intel Ice Lake" + select X86_P6_NOP + help + + Select this for 10th Gen Core processors in the Ice Lake family. + + Enables -march=icelake-client + +config MCASCADELAKE + bool "Intel Cascade Lake" + select X86_P6_NOP + help + + Select this for Xeon processors in the Cascade Lake family. + + Enables -march=cascadelake + +config MCOOPERLAKE + bool "Intel Cooper Lake" + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + select X86_P6_NOP + help + + Select this for Xeon processors in the Cooper Lake family. + + Enables -march=cooperlake + +config MTIGERLAKE + bool "Intel Tiger Lake" + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + select X86_P6_NOP + help + + Select this for third-generation 10 nm process processors in the Tiger Lake family. + + Enables -march=tigerlake + +config MSAPPHIRERAPIDS + bool "Intel Sapphire Rapids" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for third-generation 10 nm process processors in the Sapphire Rapids family. + + Enables -march=sapphirerapids + +config MROCKETLAKE + bool "Intel Rocket Lake" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for eleventh-generation processors in the Rocket Lake family. + + Enables -march=rocketlake + +config MALDERLAKE + bool "Intel Alder Lake" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for twelfth-generation processors in the Alder Lake family. + + Enables -march=alderlake + config GENERIC_CPU bool "Generic-x86-64" depends on X86_64 @@ -294,6 +558,50 @@ config GENERIC_CPU Generic x86-64 CPU. Run equally well on all x86-64 CPUs. +config GENERIC_CPU2 + bool "Generic-x86-64-v2" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs with min support of x86-64-v2. + +config GENERIC_CPU3 + bool "Generic-x86-64-v3" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64-v3 CPU with v3 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v3. + +config GENERIC_CPU4 + bool "Generic-x86-64-v4" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64 CPU with v4 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v4. + +config MNATIVE_INTEL + bool "Intel-Native optimizations autodetected by the compiler" + help + + Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for AMD CPUs. Intel Only! + + Enables -march=native + +config MNATIVE_AMD + bool "AMD-Native optimizations autodetected by the compiler" + help + + Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for Intel CPUs. AMD Only! + + Enables -march=native + endchoice config X86_GENERIC @@ -318,7 +626,7 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD || X86_GENERIC || GENERIC_CPU || GENERIC_CPU2 || GENERIC_CPU3 || GENERIC_CPU4 default "4" if MELAN || M486SX || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX @@ -336,11 +644,11 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD # # P6_NOPs are a relatively minor optimization that require a family >= @@ -356,26 +664,26 @@ config X86_USE_PPRO_CHECKSUM config X86_P6_NOP def_bool y depends on X86_64 - depends on (MCORE2 || MPENTIUM4 || MPSC) + depends on (MCORE2 || MPENTIUM4 || MPSC || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL) config X86_TSC def_bool y - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) || X86_64 config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) default "5" if X86_32 && X86_CMPXCHG64 default "4" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index e84cdd409b646..7d3bbf060079c 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -131,8 +131,44 @@ else # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += -march=k8 cflags-$(CONFIG_MPSC) += -march=nocona - cflags-$(CONFIG_MCORE2) += -march=core2 - cflags-$(CONFIG_MATOM) += -march=atom + cflags-$(CONFIG_MK8SSE3) += -march=k8-sse3 + cflags-$(CONFIG_MK10) += -march=amdfam10 + cflags-$(CONFIG_MBARCELONA) += -march=barcelona + cflags-$(CONFIG_MBOBCAT) += -march=btver1 + cflags-$(CONFIG_MJAGUAR) += -march=btver2 + cflags-$(CONFIG_MBULLDOZER) += -march=bdver1 + cflags-$(CONFIG_MPILEDRIVER) += -march=bdver2 -mno-tbm + cflags-$(CONFIG_MSTEAMROLLER) += -march=bdver3 -mno-tbm + cflags-$(CONFIG_MEXCAVATOR) += -march=bdver4 -mno-tbm + cflags-$(CONFIG_MZEN) += -march=znver1 + cflags-$(CONFIG_MZEN2) += -march=znver2 + cflags-$(CONFIG_MZEN3) += -march=znver3 + cflags-$(CONFIG_MNATIVE_INTEL) += -march=native + cflags-$(CONFIG_MNATIVE_AMD) += -march=native + cflags-$(CONFIG_MATOM) += -march=bonnell + cflags-$(CONFIG_MCORE2) += -march=core2 + cflags-$(CONFIG_MNEHALEM) += -march=nehalem + cflags-$(CONFIG_MWESTMERE) += -march=westmere + cflags-$(CONFIG_MSILVERMONT) += -march=silvermont + cflags-$(CONFIG_MGOLDMONT) += -march=goldmont + cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus + cflags-$(CONFIG_MSANDYBRIDGE) += -march=sandybridge + cflags-$(CONFIG_MIVYBRIDGE) += -march=ivybridge + cflags-$(CONFIG_MHASWELL) += -march=haswell + cflags-$(CONFIG_MBROADWELL) += -march=broadwell + cflags-$(CONFIG_MSKYLAKE) += -march=skylake + cflags-$(CONFIG_MSKYLAKEX) += -march=skylake-avx512 + cflags-$(CONFIG_MCANNONLAKE) += -march=cannonlake + cflags-$(CONFIG_MICELAKE) += -march=icelake-client + cflags-$(CONFIG_MCASCADELAKE) += -march=cascadelake + cflags-$(CONFIG_MCOOPERLAKE) += -march=cooperlake + cflags-$(CONFIG_MTIGERLAKE) += -march=tigerlake + cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids + cflags-$(CONFIG_MROCKETLAKE) += -march=rocketlake + cflags-$(CONFIG_MALDERLAKE) += -march=alderlake + cflags-$(CONFIG_GENERIC_CPU2) += -march=x86-64-v2 + cflags-$(CONFIG_GENERIC_CPU3) += -march=x86-64-v3 + cflags-$(CONFIG_GENERIC_CPU4) += -march=x86-64-v4 cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 71fae5a09e56d..2077ce7a56479 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -297,7 +297,7 @@ sub poly1305_iteration { $code.=<<___; mov \$1,%eax .Lno_key: - ret + RET ___ &end_function("poly1305_init_x86_64"); @@ -373,7 +373,7 @@ sub poly1305_iteration { .cfi_adjust_cfa_offset -48 .Lno_data: .Lblocks_epilogue: - ret + RET .cfi_endproc ___ &end_function("poly1305_blocks_x86_64"); @@ -399,7 +399,7 @@ sub poly1305_iteration { mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET ___ &end_function("poly1305_emit_x86_64"); if ($avx) { @@ -429,7 +429,7 @@ sub poly1305_iteration { &poly1305_iteration(); $code.=<<___; pop $ctx - ret + RET .size __poly1305_block,.-__poly1305_block .type __poly1305_init_avx,\@abi-omnipotent @@ -594,7 +594,7 @@ sub poly1305_iteration { lea -48-64($ctx),$ctx # size [de-]optimization pop %rbp - ret + RET .size __poly1305_init_avx,.-__poly1305_init_avx ___ @@ -747,7 +747,7 @@ sub poly1305_iteration { .cfi_restore %rbp .Lno_data_avx: .Lblocks_avx_epilogue: - ret + RET .cfi_endproc .align 32 @@ -1452,7 +1452,7 @@ sub poly1305_iteration { ___ $code.=<<___; vzeroupper - ret + RET .cfi_endproc ___ &end_function("poly1305_blocks_avx"); @@ -1508,7 +1508,7 @@ sub poly1305_iteration { mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET ___ &end_function("poly1305_emit_avx"); @@ -1675,7 +1675,7 @@ sub poly1305_blocks_avxN { .cfi_restore %rbp .Lno_data_avx2$suffix: .Lblocks_avx2_epilogue$suffix: - ret + RET .cfi_endproc .align 32 @@ -2201,7 +2201,7 @@ sub poly1305_blocks_avxN { ___ $code.=<<___; vzeroupper - ret + RET .cfi_endproc ___ if($avx > 2 && $avx512) { @@ -2792,7 +2792,7 @@ sub poly1305_blocks_avxN { .cfi_def_cfa_register %rsp ___ $code.=<<___; - ret + RET .cfi_endproc ___ @@ -2893,7 +2893,7 @@ sub poly1305_blocks_avxN { ___ $code.=<<___; mov \$1,%eax - ret + RET .size poly1305_init_base2_44,.-poly1305_init_base2_44 ___ { @@ -3010,7 +3010,7 @@ sub poly1305_blocks_avxN { jnz .Lblocks_vpmadd52_4x .Lno_data_vpmadd52: - ret + RET .size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 ___ } @@ -3451,7 +3451,7 @@ sub poly1305_blocks_avxN { vzeroall .Lno_data_vpmadd52_4x: - ret + RET .size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x ___ } @@ -3824,7 +3824,7 @@ sub poly1305_blocks_avxN { vzeroall .Lno_data_vpmadd52_8x: - ret + RET .size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x ___ } @@ -3861,7 +3861,7 @@ sub poly1305_blocks_avxN { mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET .size poly1305_emit_base2_44,.-poly1305_emit_base2_44 ___ } } } @@ -3916,7 +3916,7 @@ sub poly1305_blocks_avxN { .Ldone_enc: mov $otp,%rax - ret + RET .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad .globl xor128_decrypt_n_pad @@ -3967,7 +3967,7 @@ sub poly1305_blocks_avxN { .Ldone_dec: mov $otp,%rax - ret + RET .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad ___ } @@ -4109,7 +4109,7 @@ sub poly1305_blocks_avxN { pop %rbx pop %rdi pop %rsi - ret + RET .size avx_handler,.-avx_handler .section .pdata diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 320480a8db4f8..331aaf1a782ff 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -455,3 +455,4 @@ 448 i386 process_mrelease sys_process_mrelease 449 i386 futex_waitv sys_futex_waitv 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node +451 i386 pmadv_ksm sys_pmadv_ksm diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index c84d12608cd2d..14902db4c01fc 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -372,6 +372,7 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 2d33bba9a1440..215aed65e9782 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -472,7 +472,7 @@ static u64 pt_config_filters(struct perf_event *event) pt->filters.filter[range].msr_b = filter->msr_b; } - rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; + rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off; } return rtit_ctl; diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 8a9432fb3802b..f973788f6b217 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -819,7 +819,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) static inline int pmd_bad(pmd_t pmd) { - return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; + return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != + (_KERNPG_TABLE & ~_PAGE_ACCESSED); } static inline unsigned long pages_to_mb(unsigned long npg) @@ -1423,10 +1424,10 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } -#define arch_faults_on_old_pte arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) { - return false; + return true; } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index bb2fb78523cee..ab572d8def2b7 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -222,17 +222,19 @@ struct __attribute__ ((__packed__)) vmcb_control_area { /* AVIC */ -#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFFULL) #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) -#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFF) +#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL) -#define AVIC_DOORBELL_PHYSICAL_ID_MASK (0xFF) +#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL #define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 #define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h index 75884d2cdec37..4e6a08d4c7e53 100644 --- a/arch/x86/include/asm/vermagic.h +++ b/arch/x86/include/asm/vermagic.h @@ -17,6 +17,48 @@ #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 #define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MNATIVE_INTEL +#define MODULE_PROC_FAMILY "NATIVE_INTEL " +#elif defined CONFIG_MNATIVE_AMD +#define MODULE_PROC_FAMILY "NATIVE_AMD " +#elif defined CONFIG_MNEHALEM +#define MODULE_PROC_FAMILY "NEHALEM " +#elif defined CONFIG_MWESTMERE +#define MODULE_PROC_FAMILY "WESTMERE " +#elif defined CONFIG_MSILVERMONT +#define MODULE_PROC_FAMILY "SILVERMONT " +#elif defined CONFIG_MGOLDMONT +#define MODULE_PROC_FAMILY "GOLDMONT " +#elif defined CONFIG_MGOLDMONTPLUS +#define MODULE_PROC_FAMILY "GOLDMONTPLUS " +#elif defined CONFIG_MSANDYBRIDGE +#define MODULE_PROC_FAMILY "SANDYBRIDGE " +#elif defined CONFIG_MIVYBRIDGE +#define MODULE_PROC_FAMILY "IVYBRIDGE " +#elif defined CONFIG_MHASWELL +#define MODULE_PROC_FAMILY "HASWELL " +#elif defined CONFIG_MBROADWELL +#define MODULE_PROC_FAMILY "BROADWELL " +#elif defined CONFIG_MSKYLAKE +#define MODULE_PROC_FAMILY "SKYLAKE " +#elif defined CONFIG_MSKYLAKEX +#define MODULE_PROC_FAMILY "SKYLAKEX " +#elif defined CONFIG_MCANNONLAKE +#define MODULE_PROC_FAMILY "CANNONLAKE " +#elif defined CONFIG_MICELAKE +#define MODULE_PROC_FAMILY "ICELAKE " +#elif defined CONFIG_MCASCADELAKE +#define MODULE_PROC_FAMILY "CASCADELAKE " +#elif defined CONFIG_MCOOPERLAKE +#define MODULE_PROC_FAMILY "COOPERLAKE " +#elif defined CONFIG_MTIGERLAKE +#define MODULE_PROC_FAMILY "TIGERLAKE " +#elif defined CONFIG_MSAPPHIRERAPIDS +#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS " +#elif defined CONFIG_ROCKETLAKE +#define MODULE_PROC_FAMILY "ROCKETLAKE " +#elif defined CONFIG_MALDERLAKE +#define MODULE_PROC_FAMILY "ALDERLAKE " #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 @@ -35,6 +77,30 @@ #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MK8SSE3 +#define MODULE_PROC_FAMILY "K8SSE3 " +#elif defined CONFIG_MK10 +#define MODULE_PROC_FAMILY "K10 " +#elif defined CONFIG_MBARCELONA +#define MODULE_PROC_FAMILY "BARCELONA " +#elif defined CONFIG_MBOBCAT +#define MODULE_PROC_FAMILY "BOBCAT " +#elif defined CONFIG_MBULLDOZER +#define MODULE_PROC_FAMILY "BULLDOZER " +#elif defined CONFIG_MPILEDRIVER +#define MODULE_PROC_FAMILY "PILEDRIVER " +#elif defined CONFIG_MSTEAMROLLER +#define MODULE_PROC_FAMILY "STEAMROLLER " +#elif defined CONFIG_MJAGUAR +#define MODULE_PROC_FAMILY "JAGUAR " +#elif defined CONFIG_MEXCAVATOR +#define MODULE_PROC_FAMILY "EXCAVATOR " +#elif defined CONFIG_MZEN +#define MODULE_PROC_FAMILY "ZEN " +#elif defined CONFIG_MZEN2 +#define MODULE_PROC_FAMILY "ZEN2 " +#elif defined CONFIG_MZEN3 +#define MODULE_PROC_FAMILY "ZEN3 " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5b6d1a95776f0..0d01e7f5078c2 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1328,6 +1328,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d) return 0; } +static int __init disable_acpi_xsdt(const struct dmi_system_id *d) +{ + if (!acpi_force) { + pr_notice("%s detected: force use of acpi=rsdt\n", d->ident); + acpi_gbl_do_not_use_xsdt = TRUE; + } else { + pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n"); + } + return 0; +} + static int __init dmi_disable_acpi(const struct dmi_system_id *d) { if (!acpi_force) { @@ -1451,6 +1462,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, + /* + * Boxes that need ACPI XSDT use disabled due to corrupted tables + */ + { + .callback = disable_acpi_xsdt, + .ident = "Advantech DAC-BJ01", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"), + DMI_MATCH(DMI_BIOS_VERSION, "V1.12"), + DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"), + }, + }, {} }; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 7c7824ae78622..dc6d5e98d2963 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1639,7 +1639,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) perm = guest ? &fpu->guest_perm : &fpu->perm; /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ - WRITE_ONCE(perm->__state_perm, requested); + WRITE_ONCE(perm->__state_perm, mask); /* Protected by sighand lock */ perm->__state_size = ksize; perm->__user_state_size = usize; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d77481ecb0d5f..ed8a13ac4ab23 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -517,7 +517,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) { ipi_bitmap <<= min - apic_id; min = apic_id; - } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { + } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) { max = apic_id < max ? max : apic_id; } else { ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e86d610dc6b7a..02d061a06aa19 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1623,11 +1623,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, goto exception; } - if (!seg_desc.p) { - err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; - goto exception; - } - dpl = seg_desc.dpl; switch (seg) { @@ -1667,6 +1662,10 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, case VCPU_SREG_TR: if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) goto exception; + if (!seg_desc.p) { + err_vec = NP_VECTOR; + goto exception; + } old_desc = seg_desc; seg_desc.type |= 2; /* busy */ ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, @@ -1691,6 +1690,11 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, break; } + if (!seg_desc.p) { + err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; + goto exception; + } + if (seg_desc.s) { /* mark segment as accessed */ if (!(seg_desc.type & 1)) { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6e38a7d22e97a..10bc257d3803b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -236,7 +236,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic); int ret; - if (!synic->active && !host) + if (!synic->active && (!host || data)) return 1; trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); @@ -282,6 +282,9 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, case HV_X64_MSR_EOM: { int i; + if (!synic->active) + break; + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) kvm_hv_notify_acked_sint(vcpu, i); break; @@ -446,6 +449,9 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) struct kvm_lapic_irq irq; int ret, vector; + if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm)) + return -EINVAL; + if (sint >= ARRAY_SIZE(synic->sint)) return -EINVAL; @@ -658,7 +664,7 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); - if (!synic->active && !host) + if (!synic->active && (!host || config)) return 1; if (unlikely(!host && hv_vcpu->enforce_cpuid && new_config.direct_mode && @@ -687,7 +693,7 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer); struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); - if (!synic->active && !host) + if (!synic->active && (!host || count)) return 1; trace_kvm_hv_stimer_set_count(hv_stimer_to_vcpu(stimer)->vcpu_id, @@ -1750,7 +1756,7 @@ struct kvm_hv_hcall { sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS]; }; -static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex) +static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { int i; gpa_t gpa; @@ -1765,7 +1771,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool int sparse_banks_len; bool all_cpus; - if (!ex) { + if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST || + hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) { if (hc->fast) { flush.address_space = hc->ingpa; flush.flags = hc->outgpa; @@ -1819,7 +1826,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool if (!all_cpus) { if (hc->fast) { - if (sparse_banks_len > HV_HYPERCALL_MAX_XMM_REGISTERS - 1) + /* XMM0 is already consumed, each XMM holds two sparse banks. */ + if (sparse_banks_len > 2 * (HV_HYPERCALL_MAX_XMM_REGISTERS - 1)) return HV_STATUS_INVALID_HYPERCALL_INPUT; for (i = 0; i < sparse_banks_len; i += 2) { sparse_banks[i] = sse128_lo(hc->xmm[i / 2 + 1]); @@ -1875,7 +1883,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, } } -static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex) +static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm *kvm = vcpu->kvm; struct hv_send_ipi_ex send_ipi_ex; @@ -1888,8 +1896,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool int sparse_banks_len; u32 vector; bool all_cpus; + int i; - if (!ex) { + if (hc->code == HVCALL_SEND_IPI) { if (!hc->fast) { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi, sizeof(send_ipi)))) @@ -1908,9 +1917,15 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool trace_kvm_hv_send_ipi(vector, sparse_banks[0]); } else { - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, - sizeof(send_ipi_ex)))) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (!hc->fast) { + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, + sizeof(send_ipi_ex)))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } else { + send_ipi_ex.vector = (u32)hc->ingpa; + send_ipi_ex.vp_set.format = hc->outgpa; + send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]); + } trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, send_ipi_ex.vp_set.format, @@ -1918,8 +1933,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool vector = send_ipi_ex.vector; valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; - sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * - sizeof(sparse_banks[0]); + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64); all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; @@ -1929,12 +1943,27 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool if (!sparse_banks_len) goto ret_success; - if (kvm_read_guest(kvm, - hc->ingpa + offsetof(struct hv_send_ipi_ex, - vp_set.bank_contents), - sparse_banks, - sparse_banks_len)) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (!hc->fast) { + if (kvm_read_guest(kvm, + hc->ingpa + offsetof(struct hv_send_ipi_ex, + vp_set.bank_contents), + sparse_banks, + sparse_banks_len * sizeof(sparse_banks[0]))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } else { + /* + * The lower half of XMM0 is already consumed, each XMM holds + * two sparse banks. + */ + if (sparse_banks_len > (2 * HV_HYPERCALL_MAX_XMM_REGISTERS - 1)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + for (i = 0; i < sparse_banks_len; i++) { + if (i % 2) + sparse_banks[i] = sse128_lo(hc->xmm[(i + 1) / 2]); + else + sparse_banks[i] = sse128_hi(hc->xmm[i / 2]); + } + } } check_and_send_ipi: @@ -2096,6 +2125,7 @@ static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc) case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: + case HVCALL_SEND_IPI_EX: return true; } @@ -2247,46 +2277,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) kvm_hv_hypercall_complete_userspace; return 0; case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: - if (unlikely(!hc.rep_cnt || hc.rep_idx)) { - ret = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - } - ret = kvm_hv_flush_tlb(vcpu, &hc, false); - break; - case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: - if (unlikely(hc.rep)) { - ret = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - } - ret = kvm_hv_flush_tlb(vcpu, &hc, false); - break; case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: if (unlikely(!hc.rep_cnt || hc.rep_idx)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } - ret = kvm_hv_flush_tlb(vcpu, &hc, true); + ret = kvm_hv_flush_tlb(vcpu, &hc); break; + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: if (unlikely(hc.rep)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } - ret = kvm_hv_flush_tlb(vcpu, &hc, true); + ret = kvm_hv_flush_tlb(vcpu, &hc); break; case HVCALL_SEND_IPI: - if (unlikely(hc.rep)) { - ret = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - } - ret = kvm_hv_send_ipi(vcpu, &hc, false); - break; case HVCALL_SEND_IPI_EX: - if (unlikely(hc.fast || hc.rep)) { + if (unlikely(hc.rep)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } - ret = kvm_hv_send_ipi(vcpu, &hc, true); + ret = kvm_hv_send_ipi(vcpu, &hc); break; case HVCALL_POST_DEBUG_DATA: case HVCALL_RETRIEVE_DEBUG_DATA: diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9322e6340a742..2a10d0033c964 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -992,6 +992,10 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = -1; if (irq->shorthand == APIC_DEST_SELF) { + if (KVM_BUG_ON(!src, kvm)) { + *r = 0; + return true; + } *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); return true; } @@ -2242,10 +2246,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { - struct kvm_lapic *apic = vcpu->arch.apic; - - apic_set_tpr(apic, ((cr8 & 0x0f) << 4) - | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4)); + apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4); } u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e9fbb2c8bbe2d..c7070973f0de1 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -48,6 +48,7 @@ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE) #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP) +#define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX) static __always_inline u64 rsvd_bits(int s, int e) { diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 5b5bdac97c7b9..3821d5140ea31 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -34,9 +34,8 @@ #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgq" #else - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 @@ -52,7 +51,7 @@ #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgl" #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT @@ -65,7 +64,9 @@ #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) - #define CMPXCHG cmpxchg64 + #ifdef CONFIG_X86_64 + #define CMPXCHG "cmpxchgq" + #endif #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #error Invalid PTTYPE value @@ -147,43 +148,36 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { - int npages; - pt_element_t ret; - pt_element_t *table; - struct page *page; - - npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); - if (likely(npages == 1)) { - table = kmap_atomic(page); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table); - - kvm_release_page_dirty(page); - } else { - struct vm_area_struct *vma; - unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; - unsigned long pfn; - unsigned long paddr; - - mmap_read_lock(current->mm); - vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); - if (!vma || !(vma->vm_flags & VM_PFNMAP)) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - paddr = pfn << PAGE_SHIFT; - table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); - if (!table) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - ret = CMPXCHG(&table[index], orig_pte, new_pte); - memunmap(table); - mmap_read_unlock(current->mm); - } + signed char r; - return (ret != orig_pte); + if (!user_access_begin(ptep_user, sizeof(pt_element_t))) + return -EFAULT; + +#ifdef CMPXCHG + asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) + : [ptr] "+m" (*ptep_user), + [old] "+a" (orig_pte), + [r] "=q" (r) + : [new] "r" (new_pte) + : "memory"); +#else + asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) + : [ptr] "+m" (*ptep_user), + [old] "+A" (orig_pte), + [r] "=q" (r) + : [new_lo] "b" ((u32)new_pte), + [new_hi] "c" ((u32)(new_pte >> 32)) + : "memory"); +#endif + + user_access_end(); + return r; } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index bc9e3553fba2d..d2e69b2ddbbee 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -99,15 +99,18 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, } /* - * Finds the next valid root after root (or the first valid root if root - * is NULL), takes a reference on it, and returns that next root. If root - * is not NULL, this thread should have already taken a reference on it, and - * that reference will be dropped. If no valid root is found, this - * function will return NULL. + * Returns the next root after @prev_root (or the first root if @prev_root is + * NULL). A reference to the returned root is acquired, and the reference to + * @prev_root is released (the caller obviously must hold a reference to + * @prev_root if it's non-NULL). + * + * If @only_valid is true, invalid roots are skipped. + * + * Returns NULL if the end of tdp_mmu_roots was reached. */ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, struct kvm_mmu_page *prev_root, - bool shared) + bool shared, bool only_valid) { struct kvm_mmu_page *next_root; @@ -121,9 +124,14 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots, typeof(*next_root), link); - while (next_root && !kvm_tdp_mmu_get_root(kvm, next_root)) + while (next_root) { + if ((!only_valid || !next_root->role.invalid) && + kvm_tdp_mmu_get_root(kvm, next_root)) + break; + next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, &next_root->link, typeof(*next_root), link); + } rcu_read_unlock(); @@ -143,13 +151,19 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, * mode. In the unlikely event that this thread must free a root, the lock * will be temporarily dropped and reacquired in write mode. */ -#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ - for (_root = tdp_mmu_next_root(_kvm, NULL, _shared); \ - _root; \ - _root = tdp_mmu_next_root(_kvm, _root, _shared)) \ - if (kvm_mmu_page_as_id(_root) != _as_id) { \ +#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, _only_valid)\ + for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, _only_valid); \ + _root; \ + _root = tdp_mmu_next_root(_kvm, _root, _shared, _only_valid)) \ + if (kvm_mmu_page_as_id(_root) != _as_id) { \ } else +#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ + __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true) + +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ + __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, false) + #define for_each_tdp_mmu_root(_kvm, _root, _as_id) \ list_for_each_entry_rcu(_root, &_kvm->arch.tdp_mmu_roots, link, \ lockdep_is_held_type(&kvm->mmu_lock, 0) || \ @@ -200,7 +214,10 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level); - /* Check for an existing root before allocating a new one. */ + /* + * Check for an existing root before allocating a new one. Note, the + * role check prevents consuming an invalid root. + */ for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) { if (root->role.word == role.word && kvm_tdp_mmu_get_root(kvm, root)) @@ -1032,13 +1049,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush) { - struct kvm_mmu_page *root; - - for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false) - flush = zap_gfn_range(kvm, root, range->start, range->end, - range->may_block, flush, false); - - return flush; + return __kvm_tdp_mmu_zap_gfn_range(kvm, range->slot->as_id, range->start, + range->end, range->may_block, flush); } typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter, @@ -1221,7 +1233,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, lockdep_assert_held_read(&kvm->mmu_lock); - for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages, min_level); @@ -1249,6 +1261,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; + if (!is_shadow_present_pte(iter.old_spte)) + continue; + if (spte_ad_need_write_protect(iter.old_spte)) { if (is_writable_pte(iter.old_spte)) new_spte = iter.old_spte & ~PT_WRITABLE_MASK; @@ -1291,7 +1306,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, lockdep_assert_held_read(&kvm->mmu_lock); - for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); @@ -1416,7 +1431,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, lockdep_assert_held_read(&kvm->mmu_lock); - for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) + for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) zap_collapsible_spte_range(kvm, root, slot); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 3899004a5d91e..08c917511fedd 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -10,9 +10,6 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm *kvm, struct kvm_mmu_page *root) { - if (root->role.invalid) - return false; - return refcount_inc_not_zero(&root->tdp_mmu_root_count); } diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index fb3e207913388..7ef229011db8a 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -783,7 +783,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; - int idx, ret = -EINVAL; + int idx, ret = 0; if (!kvm_arch_has_assigned_device(kvm) || !irq_remapping_cap(IRQ_POSTING_CAP)) @@ -794,7 +794,13 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - WARN_ON(guest_irq >= irq_rt->nr_rt_entries); + + if (guest_irq >= irq_rt->nr_rt_entries || + hlist_empty(&irq_rt->map[guest_irq])) { + pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", + guest_irq, irq_rt->nr_rt_entries); + goto out; + } hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { struct vcpu_data vcpu_info; @@ -927,17 +933,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 17b53457d8664..fef9758525826 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2358,7 +2358,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) +static int sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu; struct ghcb *ghcb; @@ -2463,7 +2463,7 @@ static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) goto vmgexit_err; } - return true; + return 0; vmgexit_err: vcpu = &svm->vcpu; @@ -2486,7 +2486,8 @@ static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) ghcb_set_sw_exit_info_1(ghcb, 2); ghcb_set_sw_exit_info_2(ghcb, reason); - return false; + /* Resume the guest to "return" the error code. */ + return 1; } void sev_es_unmap_ghcb(struct vcpu_svm *svm) @@ -2545,7 +2546,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) } #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) -static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) { struct vmcb_control_area *control = &svm->vmcb->control; struct ghcb *ghcb = svm->sev_es.ghcb; @@ -2598,14 +2599,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) } scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); if (!scratch_va) - goto e_scratch; + return -ENOMEM; if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { /* Unable to copy scratch area from guest */ pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); kvfree(scratch_va); - goto e_scratch; + return -EFAULT; } /* @@ -2621,13 +2622,13 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) svm->sev_es.ghcb_sa = scratch_va; svm->sev_es.ghcb_sa_len = len; - return true; + return 0; e_scratch: ghcb_set_sw_exit_info_1(ghcb, 2); ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); - return false; + return 1; } static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, @@ -2765,17 +2766,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) exit_code = ghcb_get_sw_exit_code(ghcb); - if (!sev_es_validate_vmgexit(svm)) - return 1; + ret = sev_es_validate_vmgexit(svm); + if (ret) + return ret; sev_es_sync_from_ghcb(svm); ghcb_set_sw_exit_info_1(ghcb, 0); ghcb_set_sw_exit_info_2(ghcb, 0); - ret = 1; switch (exit_code) { case SVM_VMGEXIT_MMIO_READ: - if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) + ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); + if (ret) break; ret = kvm_sev_es_mmio_read(vcpu, @@ -2784,7 +2786,8 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm->sev_es.ghcb_sa); break; case SVM_VMGEXIT_MMIO_WRITE: - if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) + ret = setup_vmgexit_scratch(svm, false, control->exit_info_2); + if (ret) break; ret = kvm_sev_es_mmio_write(vcpu, @@ -2817,6 +2820,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT); } + ret = 1; break; } case SVM_VMGEXIT_UNSUPPORTED_EVENT: @@ -2836,6 +2840,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) { int count; int bytes; + int r; if (svm->vmcb->control.exit_info_2 > INT_MAX) return -EINVAL; @@ -2844,8 +2849,9 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) if (unlikely(check_mul_overflow(count, size, &bytes))) return -EINVAL; - if (!setup_vmgexit_scratch(svm, in, bytes)) - return 1; + r = setup_vmgexit_scratch(svm, in, bytes); + if (r) + return r; return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, count, in); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eb4029660bd9f..9b6166348c94f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1656,8 +1656,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return r; } - /* Update reserved bits */ - if ((efer ^ old_efer) & EFER_NX) + if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS) kvm_mmu_reset_context(vcpu); return 0; diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c index df50451d94ef7..3e2f33fc33de2 100644 --- a/arch/x86/lib/iomem.c +++ b/arch/x86/lib/iomem.c @@ -22,7 +22,7 @@ static __always_inline void rep_movs(void *to, const void *from, size_t n) : "memory"); } -void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) +static void string_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) { if (unlikely(!n)) return; @@ -38,9 +38,8 @@ void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) } rep_movs(to, (const void *)from, n); } -EXPORT_SYMBOL(memcpy_fromio); -void memcpy_toio(volatile void __iomem *to, const void *from, size_t n) +static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) { if (unlikely(!n)) return; @@ -56,14 +55,64 @@ void memcpy_toio(volatile void __iomem *to, const void *from, size_t n) } rep_movs((void *)to, (const void *) from, n); } + +static void unrolled_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) +{ + const volatile char __iomem *in = from; + char *out = to; + int i; + + for (i = 0; i < n; ++i) + out[i] = readb(&in[i]); +} + +static void unrolled_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) +{ + volatile char __iomem *out = to; + const char *in = from; + int i; + + for (i = 0; i < n; ++i) + writeb(in[i], &out[i]); +} + +static void unrolled_memset_io(volatile void __iomem *a, int b, size_t c) +{ + volatile char __iomem *mem = a; + int i; + + for (i = 0; i < c; ++i) + writeb(b, &mem[i]); +} + +void memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) +{ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) + unrolled_memcpy_fromio(to, from, n); + else + string_memcpy_fromio(to, from, n); +} +EXPORT_SYMBOL(memcpy_fromio); + +void memcpy_toio(volatile void __iomem *to, const void *from, size_t n) +{ + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) + unrolled_memcpy_toio(to, from, n); + else + string_memcpy_toio(to, from, n); +} EXPORT_SYMBOL(memcpy_toio); void memset_io(volatile void __iomem *a, int b, size_t c) { - /* - * TODO: memset can mangle the IO patterns quite a bit. - * perhaps it would be better to use a dumb one: - */ - memset((void *)a, b, c); + if (cc_platform_has(CC_ATTR_GUEST_UNROLL_STRING_IO)) { + unrolled_memset_io(a, b, c); + } else { + /* + * TODO: memset can mangle the IO patterns quite a bit. + * perhaps it would be better to use a dumb one: + */ + memset((void *)a, b, c); + } } EXPORT_SYMBOL(memset_io); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3481b35cb4ec7..a224193d84bff 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, return ret; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { @@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp) { diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 9f2b251e83c56..7cbb3d00d9e57 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -500,10 +500,24 @@ static int pm_cpu_check(const struct x86_cpu_id *c) return ret; } +static void pm_save_spec_msr(void) +{ + u32 spec_msr_id[] = { + MSR_IA32_SPEC_CTRL, + MSR_IA32_TSX_CTRL, + MSR_TSX_FORCE_ABORT, + MSR_IA32_MCU_OPT_CTRL, + MSR_AMD64_LS_CFG, + }; + + msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); +} + static int pm_check_save_msr(void) { dmi_check_system(msr_save_dmi_table); pm_cpu_check(msr_save_cpu_table); + pm_save_spec_msr(); return 0; } diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 89dd6b1708b04..21ecbe754cb2f 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -506,10 +506,7 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) return ret; } -bool is_xen_pmu(int cpu) -{ - return (get_xenpmu_data() != NULL); -} +bool is_xen_pmu; void xen_pmu_init(int cpu) { @@ -520,7 +517,7 @@ void xen_pmu_init(int cpu) BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); - if (xen_hvm_domain()) + if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu)) return; xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); @@ -541,7 +538,8 @@ void xen_pmu_init(int cpu) per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; per_cpu(xenpmu_shared, cpu).flags = 0; - if (cpu == 0) { + if (!is_xen_pmu) { + is_xen_pmu = true; perf_register_guest_info_callbacks(&xen_guest_cbs); xen_pmu_arch_init(); } diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h index 0e83a160589bc..65c58894fc79f 100644 --- a/arch/x86/xen/pmu.h +++ b/arch/x86/xen/pmu.h @@ -4,6 +4,8 @@ #include +extern bool is_xen_pmu; + irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); #ifdef CONFIG_XEN_HAVE_VPMU void xen_pmu_init(int cpu); @@ -12,7 +14,6 @@ void xen_pmu_finish(int cpu); static inline void xen_pmu_init(int cpu) {} static inline void xen_pmu_finish(int cpu) {} #endif -bool is_xen_pmu(int cpu); bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); int pmu_apic_update(uint32_t reg); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 4a6019238ee7d..688aa8b6ae29a 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -129,7 +129,7 @@ int xen_smp_intr_init_pv(unsigned int cpu) per_cpu(xen_irq_work, cpu).irq = rc; per_cpu(xen_irq_work, cpu).name = callfunc_name; - if (is_xen_pmu(cpu)) { + if (is_xen_pmu) { pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, xen_pmu_irq_handler, diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index bd5aeb7955675..a63eca1266577 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -411,6 +411,10 @@ extern void update_mmu_cache(struct vm_area_struct * vma, typedef pte_t *pte_addr_t; +void update_mmu_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); +#define __HAVE_ARCH_UPDATE_MMU_TLB + #endif /* !defined (__ASSEMBLY__) */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 37d3e9887fe7b..d68987d703e75 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -246,8 +246,8 @@ extern unsigned long __get_wchan(struct task_struct *p); #define xtensa_set_sr(x, sr) \ ({ \ - unsigned int v = (unsigned int)(x); \ - __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: "a"(v)); \ + __asm__ __volatile__ ("wsr %0, "__stringify(sr) :: \ + "a"((unsigned int)(x))); \ }) #define xtensa_get_sr(sr) \ diff --git a/arch/xtensa/kernel/jump_label.c b/arch/xtensa/kernel/jump_label.c index 61cf6497a646b..0dde21e0d3de4 100644 --- a/arch/xtensa/kernel/jump_label.c +++ b/arch/xtensa/kernel/jump_label.c @@ -61,7 +61,7 @@ static void patch_text(unsigned long addr, const void *data, size_t sz) .data = data, }; stop_machine_cpuslocked(patch_text_stop_machine, - &patch, NULL); + &patch, cpu_online_mask); } else { unsigned long flags; diff --git a/arch/xtensa/kernel/mxhead.S b/arch/xtensa/kernel/mxhead.S index 9f38437427264..b702c0908b1f6 100644 --- a/arch/xtensa/kernel/mxhead.S +++ b/arch/xtensa/kernel/mxhead.S @@ -37,11 +37,13 @@ _SetupOCD: * xt-gdb to single step via DEBUG exceptions received directly * by ocd. */ +#if XCHAL_HAVE_WINDOWED movi a1, 1 movi a0, 0 wsr a1, windowstart wsr a0, windowbase rsync +#endif movi a1, LOCKLEVEL wsr a1, ps diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 52c94ab5c2058..1518e261d882e 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -421,3 +421,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index f436cf2efd8b7..27a477dae2322 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -162,6 +162,12 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) } } +void update_mmu_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + local_flush_tlb_page(vma, address); +} + #ifdef CONFIG_DEBUG_TLB_SANITY static unsigned get_pte_for_vaddr(unsigned vaddr) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 24a5c5329bcd0..809bc612d96b3 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -646,6 +646,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; + /* + * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group + * until elevator exit. + */ + if (bfqq == &bfqd->oom_bfqq) + return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 36a66e97e3c28..1dff82d34b44b 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2782,6 +2782,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) * are likely to increase the throughput. */ bfqq->new_bfqq = new_bfqq; + /* + * The above assignment schedules the following redirections: + * each time some I/O for bfqq arrives, the process that + * generated that I/O is disassociated from bfqq and + * associated with new_bfqq. Here we increases new_bfqq->ref + * in advance, adding the number of processes that are + * expected to be associated with new_bfqq as they happen to + * issue I/O. + */ new_bfqq->ref += process_refs; return new_bfqq; } @@ -2844,6 +2853,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq; + /* if a merge has already been setup, then proceed with that first */ + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + /* * Check delayed stable merge for rotational or non-queueing * devs. For this branch to be executed, bfqq must not be @@ -2945,9 +2958,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfq_too_late_for_merging(bfqq)) return NULL; - if (bfqq->new_bfqq) - return bfqq->new_bfqq; - if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) return NULL; @@ -5181,7 +5191,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct request *rq; struct bfq_queue *in_serv_queue; - bool waiting_rq, idle_timer_disabled; + bool waiting_rq, idle_timer_disabled = false; spin_lock_irq(&bfqd->lock); @@ -5189,14 +5199,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); rq = __bfq_dispatch_request(hctx); - - idle_timer_disabled = - waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + if (in_serv_queue == bfqd->in_service_queue) { + idle_timer_disabled = + waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + } spin_unlock_irq(&bfqd->lock); - - bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue, - idle_timer_disabled); + bfq_update_dispatch_stats(hctx->queue, rq, + idle_timer_disabled ? in_serv_queue : NULL, + idle_timer_disabled); return rq; } diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index b74cc0da118ec..709b901de3ca9 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -519,7 +519,7 @@ unsigned short bfq_ioprio_to_weight(int ioprio) static unsigned short bfq_weight_to_ioprio(int weight) { return max_t(int, 0, - IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF - weight); + IOPRIO_NR_LEVELS - weight / BFQ_WEIGHT_CONVERSION_COEFF); } static void bfq_get_entity(struct bfq_entity *entity) diff --git a/block/bio.c b/block/bio.c index 4312a8085396b..1be1e360967d0 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1486,8 +1486,7 @@ void bio_endio(struct bio *bio) if (!bio_integrity_endio(bio)) return; - if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED)) - rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio); + rq_qos_done_bio(bio); if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 650f7e27989f1..87a1c0c3fa401 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -857,11 +857,11 @@ static void blkcg_fill_root_iostats(void) blk_queue_root_blkg(bdev_get_queue(bdev)); struct blkg_iostat tmp; int cpu; + unsigned long flags; memset(&tmp, 0, sizeof(tmp)); for_each_possible_cpu(cpu) { struct disk_stats *cpu_dkstats; - unsigned long flags; cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); tmp.ios[BLKG_IOSTAT_READ] += @@ -877,11 +877,11 @@ static void blkcg_fill_root_iostats(void) cpu_dkstats->sectors[STAT_WRITE] << 9; tmp.bytes[BLKG_IOSTAT_DISCARD] += cpu_dkstats->sectors[STAT_DISCARD] << 9; - - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); - blkg_iostat_set(&blkg->iostat.cur, &tmp); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } + + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + blkg_iostat_set(&blkg->iostat.cur, &tmp); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 11f49f78db32b..df9cfe4ca5328 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -280,7 +280,6 @@ int set_task_ioprio(struct task_struct *task, int ioprio) task_lock(task); if (task->flags & PF_EXITING) { - err = -ESRCH; kmem_cache_free(iocontext_cachep, ioc); goto out; } @@ -292,7 +291,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) task->io_context->ioprio = ioprio; out: task_unlock(task); - return err; + return 0; } EXPORT_SYMBOL_GPL(set_task_ioprio); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 6593c7123b97e..24d70e0555ddb 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -598,7 +598,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) int inflight = 0; blkg = bio->bi_blkg; - if (!blkg || !bio_flagged(bio, BIO_TRACKED)) + if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED)) return; iolat = blkg_to_lat(bio->bi_blkg); diff --git a/block/blk-merge.c b/block/blk-merge.c index 4de34a332c9fd..ea6968313b4a8 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -368,8 +369,6 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio, trace_block_split(split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); *bio = split; - - blk_throtl_charge_bio_split(*bio); } } @@ -600,6 +599,9 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, static inline int ll_new_hw_segment(struct request *req, struct bio *bio, unsigned int nr_phys_segs) { + if (!blk_cgroup_mergeable(req, bio)) + goto no_merge; + if (blk_integrity_merge_bio(req->q, req, bio) == false) goto no_merge; @@ -696,6 +698,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > blk_rq_get_max_segments(req)) return 0; + if (!blk_cgroup_mergeable(req, next->bio)) + return 0; + if (blk_integrity_merge_rq(q, req, next) == false) return 0; @@ -904,6 +909,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (bio_data_dir(bio) != rq_data_dir(rq)) return false; + /* don't merge across cgroup boundaries */ + if (!blk_cgroup_mergeable(rq, bio)) + return false; + /* only merge integrity protected bio into ditto rq */ if (blk_integrity_merge_bio(rq->q, rq, bio) == false) return false; @@ -1089,12 +1098,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, if (!plug || rq_list_empty(plug->mq_list)) return false; - /* check the previously added entry for a quick merge attempt */ - rq = rq_list_peek(&plug->mq_list); - if (rq->q == q) { - if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == - BIO_MERGE_OK) - return true; + rq_list_for_each(&plug->mq_list, rq) { + if (rq->q == q) { + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK) + return true; + break; + } + + /* + * Only keep iterating plug list for merges if we have multiple + * queues + */ + if (!plug->multiple_queues) + break; } return false; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 55488ba978232..80e0eb26b697c 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -180,11 +180,18 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) { + unsigned long end = jiffies + HZ; int ret; do { ret = __blk_mq_do_dispatch_sched(hctx); - } while (ret == 1); + if (ret != 1) + break; + if (need_resched() || time_is_before_jiffies(end)) { + blk_mq_delay_run_hw_queue(hctx, 0); + break; + } + } while (1); return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 9a9185a0a2d13..cb50097366cd4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2561,13 +2561,36 @@ static void __blk_mq_flush_plug_list(struct request_queue *q, q->mq_ops->queue_rqs(&plug->mq_list); } +static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) +{ + struct blk_mq_hw_ctx *this_hctx = NULL; + struct blk_mq_ctx *this_ctx = NULL; + struct request *requeue_list = NULL; + unsigned int depth = 0; + LIST_HEAD(list); + + do { + struct request *rq = rq_list_pop(&plug->mq_list); + + if (!this_hctx) { + this_hctx = rq->mq_hctx; + this_ctx = rq->mq_ctx; + } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { + rq_list_add(&requeue_list, rq); + continue; + } + list_add_tail(&rq->queuelist, &list); + depth++; + } while (!rq_list_empty(plug->mq_list)); + + plug->mq_list = requeue_list; + trace_block_unplug(this_hctx->queue, depth, !from_sched); + blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, from_sched); +} + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) { - struct blk_mq_hw_ctx *this_hctx; - struct blk_mq_ctx *this_ctx; struct request *rq; - unsigned int depth; - LIST_HEAD(list); if (rq_list_empty(plug->mq_list)) return; @@ -2603,35 +2626,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) return; } - this_hctx = NULL; - this_ctx = NULL; - depth = 0; do { - rq = rq_list_pop(&plug->mq_list); - - if (!this_hctx) { - this_hctx = rq->mq_hctx; - this_ctx = rq->mq_ctx; - } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { - trace_block_unplug(this_hctx->queue, depth, - !from_schedule); - blk_mq_sched_insert_requests(this_hctx, this_ctx, - &list, from_schedule); - depth = 0; - this_hctx = rq->mq_hctx; - this_ctx = rq->mq_ctx; - - } - - list_add(&rq->queuelist, &list); - depth++; + blk_mq_dispatch_plug_list(plug, from_schedule); } while (!rq_list_empty(plug->mq_list)); - - if (!list_empty(&list)) { - trace_block_unplug(this_hctx->queue, depth, !from_schedule); - blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, - from_schedule); - } } void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 3cfbc8668cba9..68267007da1c6 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -177,20 +177,20 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) __rq_qos_requeue(q->rq_qos, rq); } -static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio) +static inline void rq_qos_done_bio(struct bio *bio) { - if (q->rq_qos) - __rq_qos_done_bio(q->rq_qos, bio); + if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || + bio_flagged(bio, BIO_QOS_MERGED))) { + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + if (q->rq_qos) + __rq_qos_done_bio(q->rq_qos, bio); + } } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - /* - * BIO_TRACKED lets controllers know that a bio went through the - * normal rq_qos path. - */ if (q->rq_qos) { - bio_set_flag(bio, BIO_TRACKED); + bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); } } @@ -205,8 +205,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq, static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (q->rq_qos) + if (q->rq_qos) { + bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); + } } static inline void rq_qos_queue_depth_changed(struct request_queue *q) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9f32882ceb2f6..7923f49f1046f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -954,9 +954,6 @@ void blk_unregister_queue(struct gendisk *disk) */ if (queue_is_mq(q)) blk_mq_unregister_dev(disk_to_dev(disk), q); - - kobject_uevent(&q->kobj, KOBJ_REMOVE); - kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); mutex_lock(&q->sysfs_lock); @@ -964,6 +961,11 @@ void blk_unregister_queue(struct gendisk *disk) elv_unregister_queue(q); disk_unregister_independent_access_ranges(disk); mutex_unlock(&q->sysfs_lock); + + /* Now that we've deleted all child objects, we can delete the queue. */ + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); + mutex_unlock(&q->sysfs_dir_lock); kobject_put(&disk_to_dev(disk)->kobj); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 7c462c006b269..87769b337fc55 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -808,7 +808,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; unsigned int bio_size = throtl_bio_data_size(bio); - if (bps_limit == U64_MAX) { + /* no need to throttle if this bio's bytes have been accounted */ + if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) { if (wait) *wait = 0; return true; @@ -920,9 +921,12 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) unsigned int bio_size = throtl_bio_data_size(bio); /* Charge the bio to the group */ - tg->bytes_disp[rw] += bio_size; + if (!bio_flagged(bio, BIO_THROTTLED)) { + tg->bytes_disp[rw] += bio_size; + tg->last_bytes_disp[rw] += bio_size; + } + tg->io_disp[rw]++; - tg->last_bytes_disp[rw] += bio_size; tg->last_io_disp[rw]++; /* diff --git a/block/blk-throttle.h b/block/blk-throttle.h index 175f03abd9e41..cb43f4417d6ea 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -170,8 +170,6 @@ static inline bool blk_throtl_bio(struct bio *bio) { struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); - if (bio_flagged(bio, BIO_THROTTLED)) - return false; if (!tg->has_rules[bio_data_dir(bio)]) return false; diff --git a/block/genhd.c b/block/genhd.c index 9eca1f7d35c97..9d9d702d07787 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -330,7 +330,7 @@ int blk_alloc_ext_minor(void) { int idx; - idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL); + idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT - 1, GFP_KERNEL); if (idx == -ENOSPC) return -EBUSY; return idx; @@ -927,12 +927,17 @@ ssize_t part_stat_show(struct device *dev, struct disk_stats stat; unsigned int inflight; - part_stat_read_all(bdev, &stat); if (queue_is_mq(q)) inflight = blk_mq_in_flight(q, bdev); else inflight = part_in_flight(bdev); + if (inflight) { + part_stat_lock(); + update_io_ticks(bdev, jiffies, true); + part_stat_unlock(); + } + part_stat_read_all(bdev, &stat); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -1188,12 +1193,17 @@ static int diskstats_show(struct seq_file *seqf, void *v) xa_for_each(&gp->part_tbl, idx, hd) { if (bdev_is_partition(hd) && !bdev_nr_sectors(hd)) continue; - part_stat_read_all(hd, &stat); if (queue_is_mq(gp->queue)) inflight = blk_mq_in_flight(gp->queue, hd); else inflight = part_in_flight(hd); + if (inflight) { + part_stat_lock(); + update_io_ticks(hd, jiffies, true); + part_stat_unlock(); + } + part_stat_read_all(hd, &stat); seq_printf(seqf, "%4d %7d %pg " "%lu %lu %lu %u " "%lu %lu %lu %u " diff --git a/crypto/Kconfig b/crypto/Kconfig index 442765219c375..2cca54c59fecd 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1847,6 +1847,7 @@ config CRYPTO_JITTERENTROPY config CRYPTO_KDF800108_CTR tristate + select CRYPTO_HMAC select CRYPTO_SHA256 config CRYPTO_USER_API diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 0b4d07aa88111..f94a1d1ad3a6c 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -174,12 +174,6 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7, pr_devel("Sig %u: Found cert serial match X.509[%u]\n", sinfo->index, certix); - if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) { - pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n", - sinfo->index); - continue; - } - sinfo->signer = x509; return 0; } diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 4fefb219bfdc8..7c9e6be35c30c 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -60,39 +60,83 @@ static void public_key_destroy(void *payload0, void *payload3) } /* - * Determine the crypto algorithm name. + * Given a public_key, and an encoding and hash_algo to be used for signing + * and/or verification with that key, determine the name of the corresponding + * akcipher algorithm. Also check that encoding and hash_algo are allowed. */ -static -int software_key_determine_akcipher(const char *encoding, - const char *hash_algo, - const struct public_key *pkey, - char alg_name[CRYPTO_MAX_ALG_NAME]) +static int +software_key_determine_akcipher(const struct public_key *pkey, + const char *encoding, const char *hash_algo, + char alg_name[CRYPTO_MAX_ALG_NAME]) { int n; - if (strcmp(encoding, "pkcs1") == 0) { - /* The data wangled by the RSA algorithm is typically padded - * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447 - * sec 8.2]. + if (!encoding) + return -EINVAL; + + if (strcmp(pkey->pkey_algo, "rsa") == 0) { + /* + * RSA signatures usually use EMSA-PKCS1-1_5 [RFC3447 sec 8.2]. + */ + if (strcmp(encoding, "pkcs1") == 0) { + if (!hash_algo) + n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, + "pkcs1pad(%s)", + pkey->pkey_algo); + else + n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, + "pkcs1pad(%s,%s)", + pkey->pkey_algo, hash_algo); + return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; + } + if (strcmp(encoding, "raw") != 0) + return -EINVAL; + /* + * Raw RSA cannot differentiate between different hash + * algorithms. + */ + if (hash_algo) + return -EINVAL; + } else if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) { + if (strcmp(encoding, "x962") != 0) + return -EINVAL; + /* + * ECDSA signatures are taken over a raw hash, so they don't + * differentiate between different hash algorithms. That means + * that the verifier should hard-code a specific hash algorithm. + * Unfortunately, in practice ECDSA is used with multiple SHAs, + * so we have to allow all of them and not just one. */ if (!hash_algo) - n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, - "pkcs1pad(%s)", - pkey->pkey_algo); - else - n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, - "pkcs1pad(%s,%s)", - pkey->pkey_algo, hash_algo); - return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; - } - - if (strcmp(encoding, "raw") == 0 || - strcmp(encoding, "x962") == 0) { - strcpy(alg_name, pkey->pkey_algo); - return 0; + return -EINVAL; + if (strcmp(hash_algo, "sha1") != 0 && + strcmp(hash_algo, "sha224") != 0 && + strcmp(hash_algo, "sha256") != 0 && + strcmp(hash_algo, "sha384") != 0 && + strcmp(hash_algo, "sha512") != 0) + return -EINVAL; + } else if (strcmp(pkey->pkey_algo, "sm2") == 0) { + if (strcmp(encoding, "raw") != 0) + return -EINVAL; + if (!hash_algo) + return -EINVAL; + if (strcmp(hash_algo, "sm3") != 0) + return -EINVAL; + } else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) { + if (strcmp(encoding, "raw") != 0) + return -EINVAL; + if (!hash_algo) + return -EINVAL; + if (strcmp(hash_algo, "streebog256") != 0 && + strcmp(hash_algo, "streebog512") != 0) + return -EINVAL; + } else { + /* Unknown public key algorithm */ + return -ENOPKG; } - - return -ENOPKG; + if (strscpy(alg_name, pkey->pkey_algo, CRYPTO_MAX_ALG_NAME) < 0) + return -EINVAL; + return 0; } static u8 *pkey_pack_u32(u8 *dst, u32 val) @@ -113,9 +157,8 @@ static int software_key_query(const struct kernel_pkey_params *params, u8 *key, *ptr; int ret, len; - ret = software_key_determine_akcipher(params->encoding, - params->hash_algo, - pkey, alg_name); + ret = software_key_determine_akcipher(pkey, params->encoding, + params->hash_algo, alg_name); if (ret < 0) return ret; @@ -179,9 +222,8 @@ static int software_key_eds_op(struct kernel_pkey_params *params, pr_devel("==>%s()\n", __func__); - ret = software_key_determine_akcipher(params->encoding, - params->hash_algo, - pkey, alg_name); + ret = software_key_determine_akcipher(pkey, params->encoding, + params->hash_algo, alg_name); if (ret < 0) return ret; @@ -325,9 +367,23 @@ int public_key_verify_signature(const struct public_key *pkey, BUG_ON(!sig); BUG_ON(!sig->s); - ret = software_key_determine_akcipher(sig->encoding, - sig->hash_algo, - pkey, alg_name); + /* + * If the signature specifies a public key algorithm, it *must* match + * the key's actual public key algorithm. + * + * Small exception: ECDSA signatures don't specify the curve, but ECDSA + * keys do. So the strings can mismatch slightly in that case: + * "ecdsa-nist-*" for the key, but "ecdsa" for the signature. + */ + if (sig->pkey_algo) { + if (strcmp(pkey->pkey_algo, sig->pkey_algo) != 0 && + (strncmp(pkey->pkey_algo, "ecdsa-", 6) != 0 || + strcmp(sig->pkey_algo, "ecdsa") != 0)) + return -EKEYREJECTED; + } + + ret = software_key_determine_akcipher(pkey, sig->encoding, + sig->hash_algo, alg_name); if (ret < 0) return ret; diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index fe14cae115b51..71cc1738fbfd2 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -128,12 +128,6 @@ int x509_check_for_self_signed(struct x509_certificate *cert) goto out; } - ret = -EKEYREJECTED; - if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0 && - (strncmp(cert->pub->pkey_algo, "ecdsa-", 6) != 0 || - strcmp(cert->sig->pkey_algo, "ecdsa") != 0)) - goto out; - ret = public_key_verify_signature(cert->pub, cert->sig); if (ret < 0) { if (ret == -ENOPKG) { diff --git a/crypto/authenc.c b/crypto/authenc.c index 670bf1a01d00e..17f674a7cdff5 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -253,7 +253,7 @@ static int crypto_authenc_decrypt_tail(struct aead_request *req, dst = scatterwalk_ffwd(areq_ctx->dst, req->dst, req->assoclen); skcipher_request_set_tfm(skreq, ctx->enc); - skcipher_request_set_callback(skreq, aead_request_flags(req), + skcipher_request_set_callback(skreq, flags, req->base.complete, req->base.data); skcipher_request_set_crypt(skreq, src, dst, req->cryptlen - authsize, req->iv); diff --git a/crypto/drbg.c b/crypto/drbg.c index 177983b6ae38b..ea79a31014a40 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -115,7 +115,7 @@ * the SHA256 / AES 256 over other ciphers. Thus, the favored * DRBGs are the latest entries in this array. */ -static const struct drbg_core drbg_cores[] = { +const struct drbg_core drbg_cores[] = { #ifdef CONFIG_CRYPTO_DRBG_CTR { .flags = DRBG_CTR | DRBG_STRENGTH128, @@ -192,6 +192,7 @@ static const struct drbg_core drbg_cores[] = { }, #endif /* CONFIG_CRYPTO_DRBG_HMAC */ }; +EXPORT_SYMBOL(drbg_cores); static int drbg_uninstantiate(struct drbg_state *drbg); @@ -207,7 +208,7 @@ static int drbg_uninstantiate(struct drbg_state *drbg); * Return: normalized strength in *bytes* value or 32 as default * to counter programming errors */ -static inline unsigned short drbg_sec_strength(drbg_flag_t flags) +unsigned short drbg_sec_strength(drbg_flag_t flags) { switch (flags & DRBG_STRENGTH_MASK) { case DRBG_STRENGTH128: @@ -220,6 +221,7 @@ static inline unsigned short drbg_sec_strength(drbg_flag_t flags) return 32; } } +EXPORT_SYMBOL(drbg_sec_strength); /* * FIPS 140-2 continuous self test for the noise source @@ -1252,7 +1254,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, } /* Free all substructures in a DRBG state without the DRBG state structure */ -static inline void drbg_dealloc_state(struct drbg_state *drbg) +void drbg_dealloc_state(struct drbg_state *drbg) { if (!drbg) return; @@ -1273,12 +1275,13 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) drbg->fips_primed = false; } } +EXPORT_SYMBOL(drbg_dealloc_state); /* * Allocate all sub-structures for a DRBG state. * The DRBG state structure must already be allocated. */ -static inline int drbg_alloc_state(struct drbg_state *drbg) +int drbg_alloc_state(struct drbg_state *drbg) { int ret = -ENOMEM; unsigned int sb_size = 0; @@ -1359,6 +1362,7 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) drbg_dealloc_state(drbg); return ret; } +EXPORT_SYMBOL(drbg_alloc_state); /************************************************************************* * DRBG interface functions @@ -1895,8 +1899,7 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, * * return: flags */ -static inline void drbg_convert_tfm_core(const char *cra_driver_name, - int *coreref, bool *pr) +void drbg_convert_tfm_core(const char *cra_driver_name, int *coreref, bool *pr) { int i = 0; size_t start = 0; @@ -1923,6 +1926,7 @@ static inline void drbg_convert_tfm_core(const char *cra_driver_name, } } } +EXPORT_SYMBOL(drbg_convert_tfm_core); static int drbg_kcapi_init(struct crypto_tfm *tfm) { diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index 2d115bec15aeb..e7dac734d2377 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -42,8 +42,7 @@ #include #include #include - -#include "jitterentropy.h" +#include /*************************************************************************** * Helper function diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 93bff32138238..81b80a4d3d3a0 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -133,7 +133,7 @@ struct rand_data { #define JENT_ENTROPY_SAFETY_FACTOR 64 #include -#include "jitterentropy.h" +#include /*************************************************************************** * Adaptive Proportion Test diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 8ac3e73e8ea65..9d804831c8b3f 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -476,6 +476,8 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) pos++; if (digest_info) { + if (digest_info->size > dst_len - pos) + goto done; if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size)) goto done; @@ -495,7 +497,7 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err) sg_nents_for_len(req->src, req->src_len + req->dst_len), req_ctx->out_buf + ctx->key_size, - req->dst_len, ctx->key_size); + req->dst_len, req->src_len); /* Do the actual verification step. */ if (memcmp(req_ctx->out_buf + ctx->key_size, out_buf + pos, req->dst_len) != 0) @@ -538,7 +540,7 @@ static int pkcs1pad_verify(struct akcipher_request *req) if (WARN_ON(req->dst) || WARN_ON(!req->dst_len) || - !ctx->key_size || req->src_len < ctx->key_size) + !ctx->key_size || req->src_len != ctx->key_size) return -EINVAL; req_ctx->out_buf = kmalloc(ctx->key_size + req->dst_len, GFP_KERNEL); @@ -621,6 +623,11 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); + if (strcmp(rsa_alg->base.cra_name, "rsa") != 0) { + err = -EINVAL; + goto err_free_inst; + } + err = -ENAMETOOLONG; hash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(hash_name)) { diff --git a/crypto/xts.c b/crypto/xts.c index 6c12f30dbdd6d..63c85b9e64e08 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -466,3 +466,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XTS block cipher mode"); MODULE_ALIAS_CRYPTO("xts"); MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_SOFTDEP("pre: ecb"); diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c index 915c2433463d7..e7c30ce06e189 100644 --- a/drivers/acpi/acpica/nswalk.c +++ b/drivers/acpi/acpica/nswalk.c @@ -169,6 +169,9 @@ acpi_ns_walk_namespace(acpi_object_type type, if (start_node == ACPI_ROOT_OBJECT) { start_node = acpi_gbl_root_node; + if (!start_node) { + return_ACPI_STATUS(AE_NO_NAMESPACE); + } } /* Null child means "get first node" */ diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c index 19e50fcbf4d6f..598fd19b65fa4 100644 --- a/drivers/acpi/apei/bert.c +++ b/drivers/acpi/apei/bert.c @@ -29,6 +29,7 @@ #undef pr_fmt #define pr_fmt(fmt) "BERT: " fmt +#define ACPI_BERT_PRINT_MAX_LEN 1024 static int bert_disable; @@ -58,8 +59,11 @@ static void __init bert_print_all(struct acpi_bert_region *region, } pr_info_once("Error records from previous boot:\n"); - - cper_estatus_print(KERN_INFO HW_ERR, estatus); + if (region_len < ACPI_BERT_PRINT_MAX_LEN) + cper_estatus_print(KERN_INFO HW_ERR, estatus); + else + pr_info_once("Max print length exceeded, table data is available at:\n" + "/sys/firmware/acpi/tables/data/BERT"); /* * Because the boot error source is "one-time polled" type, @@ -77,7 +81,7 @@ static int __init setup_bert_disable(char *str) { bert_disable = 1; - return 0; + return 1; } __setup("bert_disable", setup_bert_disable); diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 242f3c2d55330..698d67cee0527 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -891,7 +891,7 @@ EXPORT_SYMBOL_GPL(erst_clear); static int __init setup_erst_disable(char *str) { erst_disable = 1; - return 0; + return 1; } __setup("erst_disable", setup_erst_disable); diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 0edc1ed476737..6aef1ee5e1bdb 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -224,7 +224,7 @@ static int __init hest_ghes_dev_register(unsigned int ghes_count) static int __init setup_hest_disable(char *str) { hest_disable = HEST_DISABLED; - return 0; + return 1; } __setup("hest_disable", setup_hest_disable); diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index ea31ae01458b4..dc208f5f5a1f7 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -59,6 +59,10 @@ MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); static const struct acpi_device_id battery_device_ids[] = { {"PNP0C0A", 0}, + + /* Microsoft Surface Go 3 */ + {"MSHW0146", 0}, + {"", 0}, }; @@ -1148,6 +1152,14 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"), }, }, + { + /* Microsoft Surface Go 3 */ + .callback = battery_notification_delay_quirk, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), + }, + }, {}, }; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 07f604832fd6b..079b952ab59f2 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -332,21 +332,32 @@ static void acpi_bus_osc_negotiate_platform_control(void) if (ACPI_FAILURE(acpi_run_osc(handle, &context))) return; - kfree(context.ret.pointer); + capbuf_ret = context.ret.pointer; + if (context.ret.length <= OSC_SUPPORT_DWORD) { + kfree(context.ret.pointer); + return; + } - /* Now run _OSC again with query flag clear */ + /* + * Now run _OSC again with query flag clear and with the caps + * supported by both the OS and the platform. + */ capbuf[OSC_QUERY_DWORD] = 0; + capbuf[OSC_SUPPORT_DWORD] = capbuf_ret[OSC_SUPPORT_DWORD]; + kfree(context.ret.pointer); if (ACPI_FAILURE(acpi_run_osc(handle, &context))) return; capbuf_ret = context.ret.pointer; - osc_sb_apei_support_acked = - capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT; - osc_pc_lpi_support_confirmed = - capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT; - osc_sb_native_usb4_support_confirmed = - capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT; + if (context.ret.length > OSC_SUPPORT_DWORD) { + osc_sb_apei_support_acked = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT; + osc_pc_lpi_support_confirmed = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT; + osc_sb_native_usb4_support_confirmed = + capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT; + } kfree(context.ret.pointer); } diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 866560cbb082c..123e98a765de7 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -676,6 +676,11 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) cpc_obj = &out_obj->package.elements[0]; if (cpc_obj->type == ACPI_TYPE_INTEGER) { num_ent = cpc_obj->integer.value; + if (num_ent <= 1) { + pr_debug("Unexpected _CPC NumEntries value (%d) for CPU:%d\n", + num_ent, pr->id); + goto out_free; + } } else { pr_debug("Unexpected entry type(%d) for NumEntries\n", cpc_obj->type); diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index d0986bda29640..3fceb4681ec9f 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -685,7 +685,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, */ if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) { if (index) - return -EINVAL; + return -ENOENT; device = acpi_fetch_acpi_dev(obj->reference.handle); if (!device) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 4f64713e9917b..becc198e4c224 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -415,6 +415,81 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "GA503"), }, }, + /* + * Clevo NL5xRU and NL5xNU/TUXEDO Aura 15 Gen1 and Gen2 have both a + * working native and video interface. However the default detection + * mechanism first registers the video interface before unregistering + * it again and switching to the native interface during boot. This + * results in a dangling SBIOS request for backlight change for some + * reason, causing the backlight to switch to ~2% once per boot on the + * first power cord connect or disconnect event. Setting the native + * interface explicitly circumvents this buggy behaviour, by avoiding + * the unregistering process. + */ + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xRU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xNU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xNU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + }, + }, + { + .callback = video_detect_force_native, + .ident = "Clevo NL5xNU", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), + }, + }, /* * Desktops which falsely report a backlight and which our heuristics diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index ffdeed5334d6f..664070fc83498 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -284,6 +284,27 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), }, + { + /* Lenovo Yoga Tablet 1050F/L */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."), + DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"), + DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"), + /* Partial match on beginning of BIOS version */ + DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + }, + { + /* Nextbook Ares 8 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + }, { /* Whitelabel (sold as various brands) TM800A550L */ .matches = { diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f47cab21430f9..752a11d16e262 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -810,7 +810,7 @@ static int __init save_async_options(char *buf) pr_warn("Too long list of driver names for 'driver_async_probe'!\n"); strlcpy(async_probe_drv_names, buf, ASYNC_DRV_NAMES_MAX_LEN); - return 0; + return 1; } __setup("driver_async_probe=", save_async_options); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 365cd4a7f2397..60c38f9cf1a75 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -663,14 +663,16 @@ static int init_memory_block(unsigned long block_id, unsigned long state, mem->nr_vmemmap_pages = nr_vmemmap_pages; INIT_LIST_HEAD(&mem->group_next); + ret = register_memory(mem); + if (ret) + return ret; + if (group) { mem->group = group; list_add(&mem->group_next, &group->memory_blocks); } - ret = register_memory(mem); - - return ret; + return 0; } static int add_memory_block(unsigned long base_section_nr) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 5db704f02e712..7e8039d1884cc 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2058,9 +2058,9 @@ static int genpd_remove(struct generic_pm_domain *genpd) kfree(link); } - genpd_debug_remove(genpd); list_del(&genpd->gpd_list_node); genpd_unlock(genpd); + genpd_debug_remove(genpd); cancel_work_sync(&genpd->power_off_work); if (genpd_is_cpu_domain(genpd)) free_cpumask_var(genpd->cpus); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 04ea92cbd9cfd..08c8a69d7b810 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -2018,7 +2018,9 @@ static bool pm_ops_is_empty(const struct dev_pm_ops *ops) void device_pm_check_callbacks(struct device *dev) { - spin_lock_irq(&dev->power.lock); + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); dev->power.no_pm_callbacks = (!dev->bus || (pm_ops_is_empty(dev->bus->pm) && !dev->bus->suspend && !dev->bus->resume)) && @@ -2027,7 +2029,7 @@ void device_pm_check_callbacks(struct device *dev) (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && (!dev->driver || (pm_ops_is_empty(dev->driver->pm) && !dev->driver->suspend && !dev->driver->resume)); - spin_unlock_irq(&dev->power.lock); + spin_unlock_irqrestore(&dev->power.lock, flags); } bool dev_pm_skip_suspend(struct device *dev) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3235532ae0778..8b26f631ebc15 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -180,7 +180,8 @@ void start_new_tl_epoch(struct drbd_connection *connection) void complete_master_bio(struct drbd_device *device, struct bio_and_error *m) { - m->bio->bi_status = errno_to_blk_status(m->error); + if (unlikely(m->error)) + m->bio->bi_status = errno_to_blk_status(m->error); bio_endio(m->bio); dec_ap_bio(device); } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 19fe19eaa50e9..d46a3d5d0c2ec 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -681,33 +681,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset); } static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); } static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) { int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); - return sprintf(buf, "%s\n", autoclear ? "1" : "0"); + return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0"); } static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) { int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); - return sprintf(buf, "%s\n", partscan ? "1" : "0"); + return sysfs_emit(buf, "%s\n", partscan ? "1" : "0"); } static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) { int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO); - return sprintf(buf, "%s\n", dio ? "1" : "0"); + return sysfs_emit(buf, "%s\n", dio ? "1" : "0"); } LOOP_ATTR_RO(backing_file); @@ -1592,6 +1592,7 @@ struct compat_loop_info { compat_ulong_t lo_inode; /* ioctl r/o */ compat_dev_t lo_rdevice; /* ioctl r/o */ compat_int_t lo_offset; + compat_int_t lo_encrypt_type; /* obsolete, ignored */ compat_int_t lo_encrypt_key_size; /* ioctl w/o */ compat_int_t lo_flags; /* ioctl r/o */ char lo_name[LO_NAME_SIZE]; diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 4db9a8c244af5..e094d2b8b5a92 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -88,7 +88,7 @@ static void n64cart_submit_bio(struct bio *bio) { struct bio_vec bvec; struct bvec_iter iter; - struct device *dev = bio->bi_disk->private_data; + struct device *dev = bio->bi_bdev->bd_disk->private_data; u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT; bio_for_each_segment(bvec, bio, iter) { diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 1a4f8b227eac0..06514ed660229 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2428,10 +2428,15 @@ static int btintel_setup_combined(struct hci_dev *hdev) /* Apply the device specific HCI quirks * - * WBS for SdP - SdP and Stp have a same hw_varaint but - * different fw_variant + * WBS for SdP - For the Legacy ROM products, only SdP + * supports the WBS. But the version information is not + * enough to use here because the StP2 and SdP have same + * hw_variant and fw_variant. So, this flag is set by + * the transport driver (btusb) based on the HW info + * (idProduct) */ - if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22) + if (!btintel_test_flag(hdev, + INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index c9b24e9299e2a..e0060e58573c3 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -152,6 +152,7 @@ enum { INTEL_BROKEN_INITIAL_NCMD, INTEL_BROKEN_SHUTDOWN_LED, INTEL_ROM_LEGACY, + INTEL_ROM_LEGACY_NO_WBS_SUPPORT, __INTEL_NUM_FLAGS, }; diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index b5ea8d3bffaa7..9b868f187316d 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -38,21 +38,25 @@ static bool enable_autosuspend; struct btmtksdio_data { const char *fwname; u16 chipid; + bool lp_mbox_supported; }; static const struct btmtksdio_data mt7663_data = { .fwname = FIRMWARE_MT7663, .chipid = 0x7663, + .lp_mbox_supported = false, }; static const struct btmtksdio_data mt7668_data = { .fwname = FIRMWARE_MT7668, .chipid = 0x7668, + .lp_mbox_supported = false, }; static const struct btmtksdio_data mt7921_data = { .fwname = FIRMWARE_MT7961, .chipid = 0x7921, + .lp_mbox_supported = true, }; static const struct sdio_device_id btmtksdio_table[] = { @@ -87,8 +91,17 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table); #define RX_DONE_INT BIT(1) #define TX_EMPTY BIT(2) #define TX_FIFO_OVERFLOW BIT(8) +#define FW_MAILBOX_INT BIT(15) +#define INT_MASK GENMASK(15, 0) #define RX_PKT_LEN GENMASK(31, 16) +#define MTK_REG_CSICR 0xc0 +#define CSICR_CLR_MBOX_ACK BIT(0) +#define MTK_REG_PH2DSM0R 0xc4 +#define PH2DSM0R_DRIVER_OWN BIT(0) +#define MTK_REG_PD2HRM0R 0xdc +#define PD2HRM0R_DRV_OWN BIT(0) + #define MTK_REG_CTDR 0x18 #define MTK_REG_CRDR 0x1c @@ -100,6 +113,7 @@ MODULE_DEVICE_TABLE(sdio, btmtksdio_table); #define BTMTKSDIO_TX_WAIT_VND_EVT 1 #define BTMTKSDIO_HW_TX_READY 2 #define BTMTKSDIO_FUNC_ENABLED 3 +#define BTMTKSDIO_PATCH_ENABLED 4 struct mtkbtsdio_hdr { __le16 len; @@ -278,6 +292,78 @@ static u32 btmtksdio_drv_own_query(struct btmtksdio_dev *bdev) return sdio_readl(bdev->func, MTK_REG_CHLPCR, NULL); } +static u32 btmtksdio_drv_own_query_79xx(struct btmtksdio_dev *bdev) +{ + return sdio_readl(bdev->func, MTK_REG_PD2HRM0R, NULL); +} + +static int btmtksdio_fw_pmctrl(struct btmtksdio_dev *bdev) +{ + u32 status; + int err; + + sdio_claim_host(bdev->func); + + if (bdev->data->lp_mbox_supported && + test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state)) { + sdio_writel(bdev->func, CSICR_CLR_MBOX_ACK, MTK_REG_CSICR, + &err); + err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev, + status, !(status & PD2HRM0R_DRV_OWN), + 2000, 1000000); + if (err < 0) { + bt_dev_err(bdev->hdev, "mailbox ACK not cleared"); + goto out; + } + } + + /* Return ownership to the device */ + sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err); + if (err < 0) + goto out; + + err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, + !(status & C_COM_DRV_OWN), 2000, 1000000); + +out: + sdio_release_host(bdev->func); + + if (err < 0) + bt_dev_err(bdev->hdev, "Cannot return ownership to device"); + + return err; +} + +static int btmtksdio_drv_pmctrl(struct btmtksdio_dev *bdev) +{ + u32 status; + int err; + + sdio_claim_host(bdev->func); + + /* Get ownership from the device */ + sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err); + if (err < 0) + goto out; + + err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, + status & C_COM_DRV_OWN, 2000, 1000000); + + if (!err && bdev->data->lp_mbox_supported && + test_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state)) + err = readx_poll_timeout(btmtksdio_drv_own_query_79xx, bdev, + status, status & PD2HRM0R_DRV_OWN, + 2000, 1000000); + +out: + sdio_release_host(bdev->func); + + if (err < 0) + bt_dev_err(bdev->hdev, "Cannot get ownership from device"); + + return err; +} + static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); @@ -480,6 +566,13 @@ static void btmtksdio_txrx_work(struct work_struct *work) * FIFO. */ sdio_writel(bdev->func, int_status, MTK_REG_CHISR, NULL); + int_status &= INT_MASK; + + if ((int_status & FW_MAILBOX_INT) && + bdev->data->chipid == 0x7921) { + sdio_writel(bdev->func, PH2DSM0R_DRIVER_OWN, + MTK_REG_PH2DSM0R, 0); + } if (int_status & FW_OWN_BACK_INT) bt_dev_dbg(bdev->hdev, "Get fw own back"); @@ -531,7 +624,7 @@ static void btmtksdio_interrupt(struct sdio_func *func) static int btmtksdio_open(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); - u32 status, val; + u32 val; int err; sdio_claim_host(bdev->func); @@ -542,18 +635,10 @@ static int btmtksdio_open(struct hci_dev *hdev) set_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state); - /* Get ownership from the device */ - sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err); + err = btmtksdio_drv_pmctrl(bdev); if (err < 0) goto err_disable_func; - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - status & C_COM_DRV_OWN, 2000, 1000000); - if (err < 0) { - bt_dev_err(bdev->hdev, "Cannot get ownership from device"); - goto err_disable_func; - } - /* Disable interrupt & mask out all interrupt sources */ sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, &err); if (err < 0) @@ -623,8 +708,6 @@ static int btmtksdio_open(struct hci_dev *hdev) static int btmtksdio_close(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); - u32 status; - int err; sdio_claim_host(bdev->func); @@ -635,13 +718,7 @@ static int btmtksdio_close(struct hci_dev *hdev) cancel_work_sync(&bdev->txrx_work); - /* Return ownership to the device */ - sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, NULL); - - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - !(status & C_COM_DRV_OWN), 2000, 1000000); - if (err < 0) - bt_dev_err(bdev->hdev, "Cannot return ownership to device"); + btmtksdio_fw_pmctrl(bdev); clear_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state); sdio_disable_func(bdev->func); @@ -686,6 +763,7 @@ static int btmtksdio_func_query(struct hci_dev *hdev) static int mt76xx_setup(struct hci_dev *hdev, const char *fwname) { + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); struct btmtk_hci_wmt_params wmt_params; struct btmtk_tci_sleep tci_sleep; struct sk_buff *skb; @@ -746,6 +824,8 @@ static int mt76xx_setup(struct hci_dev *hdev, const char *fwname) return err; } + set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); + ignore_func_on: /* Apply the low power environment setup */ tci_sleep.mode = 0x5; @@ -768,6 +848,7 @@ static int mt76xx_setup(struct hci_dev *hdev, const char *fwname) static int mt79xx_setup(struct hci_dev *hdev, const char *fwname) { + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); struct btmtk_hci_wmt_params wmt_params; u8 param = 0x1; int err; @@ -793,6 +874,7 @@ static int mt79xx_setup(struct hci_dev *hdev, const char *fwname) hci_set_msft_opcode(hdev, 0xFD30); hci_set_aosp_capable(hdev); + set_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); return err; } @@ -862,6 +944,15 @@ static int btmtksdio_setup(struct hci_dev *hdev) err = mt79xx_setup(hdev, fwname); if (err < 0) return err; + + err = btmtksdio_fw_pmctrl(bdev); + if (err < 0) + return err; + + err = btmtksdio_drv_pmctrl(bdev); + if (err < 0) + return err; + break; case 0x7663: case 0x7668: @@ -1004,6 +1095,8 @@ static int btmtksdio_probe(struct sdio_func *func, hdev->manufacturer = 70; set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks); + sdio_set_drvdata(func, bdev); + err = hci_register_dev(hdev); if (err < 0) { dev_err(&func->dev, "Can't register HCI device\n"); @@ -1011,8 +1104,6 @@ static int btmtksdio_probe(struct sdio_func *func, return err; } - sdio_set_drvdata(func, bdev); - /* pm_runtime_enable would be done after the firmware is being * downloaded because the core layer probably already enables * runtime PM for this func such as the case host->caps & @@ -1058,7 +1149,6 @@ static int btmtksdio_runtime_suspend(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); struct btmtksdio_dev *bdev; - u32 status; int err; bdev = sdio_get_drvdata(func); @@ -1070,19 +1160,10 @@ static int btmtksdio_runtime_suspend(struct device *dev) sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER); - sdio_claim_host(bdev->func); - - sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err); - if (err < 0) - goto out; + err = btmtksdio_fw_pmctrl(bdev); - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - !(status & C_COM_DRV_OWN), 2000, 1000000); -out: bt_dev_info(bdev->hdev, "status (%d) return ownership to device", err); - sdio_release_host(bdev->func); - return err; } @@ -1090,7 +1171,6 @@ static int btmtksdio_runtime_resume(struct device *dev) { struct sdio_func *func = dev_to_sdio_func(dev); struct btmtksdio_dev *bdev; - u32 status; int err; bdev = sdio_get_drvdata(func); @@ -1100,19 +1180,10 @@ static int btmtksdio_runtime_resume(struct device *dev) if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) return 0; - sdio_claim_host(bdev->func); + err = btmtksdio_drv_pmctrl(bdev); - sdio_writel(bdev->func, C_FW_OWN_REQ_CLR, MTK_REG_CHLPCR, &err); - if (err < 0) - goto out; - - err = readx_poll_timeout(btmtksdio_drv_own_query, bdev, status, - status & C_COM_DRV_OWN, 2000, 1000000); -out: bt_dev_info(bdev->hdev, "status (%d) get ownership from device", err); - sdio_release_host(bdev->func); - return err; } diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c30d131da7847..2afbd87d77c9b 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -62,6 +62,7 @@ static struct usb_driver btusb_driver; #define BTUSB_QCA_WCN6855 0x1000000 #define BTUSB_INTEL_BROKEN_SHUTDOWN_LED 0x2000000 #define BTUSB_INTEL_BROKEN_INITIAL_NCMD 0x4000000 +#define BTUSB_INTEL_NO_WBS_SUPPORT 0x8000000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -385,9 +386,11 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED }, { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED | + BTUSB_INTEL_NO_WBS_SUPPORT | BTUSB_INTEL_BROKEN_INITIAL_NCMD | BTUSB_INTEL_BROKEN_SHUTDOWN_LED }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED | + BTUSB_INTEL_NO_WBS_SUPPORT | BTUSB_INTEL_BROKEN_SHUTDOWN_LED }, { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED }, { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED | @@ -405,6 +408,8 @@ static const struct usb_device_id blacklist_table[] = { BTUSB_WIDEBAND_SPEECH }, /* Realtek 8852AE Bluetooth devices */ + { USB_DEVICE(0x0bda, 0x2852), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xc852), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x385a), .driver_info = BTUSB_REALTEK | @@ -482,6 +487,8 @@ static const struct usb_device_id blacklist_table[] = { /* Additional Realtek 8761BU Bluetooth devices */ { USB_DEVICE(0x0b05, 0x190e), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x2550, 0x8761), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, /* Additional Realtek 8821AE Bluetooth devices */ { USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK }, @@ -2041,6 +2048,8 @@ static int btusb_setup_csr(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); + set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks); + set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); /* Clear the reset quirk since this is not an actual * early Bluetooth 1.1 device from CSR. @@ -2051,7 +2060,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) /* * Special workaround for these BT 4.0 chip clones, and potentially more: * - * - 0x0134: a Barrot 8041a02 (HCI rev: 0x1012 sub: 0x0810) + * - 0x0134: a Barrot 8041a02 (HCI rev: 0x0810 sub: 0x1012) * - 0x7558: IC markings FR3191AHAL 749H15143 (HCI rev/sub-version: 0x0709) * * These controllers are really messed-up. @@ -2080,7 +2089,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) if (ret >= 0) msleep(200); else - bt_dev_err(hdev, "CSR: Failed to suspend the device for our Barrot 8041a02 receive-issue workaround"); + bt_dev_warn(hdev, "CSR: Couldn't suspend the device for our Barrot 8041a02 receive-issue workaround"); pm_runtime_forbid(&data->udev->dev); @@ -3737,6 +3746,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame_intel; hdev->cmd_timeout = btusb_intel_cmd_timeout; + if (id->driver_info & BTUSB_INTEL_NO_WBS_SUPPORT) + btintel_set_flag(hdev, INTEL_ROM_LEGACY_NO_WBS_SUPPORT); + if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD) btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD); diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index 34286ffe0568f..7ac6908a4dfb4 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -629,9 +629,11 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb) break; } - pm_runtime_get_sync(&hu->serdev->dev); - pm_runtime_mark_last_busy(&hu->serdev->dev); - pm_runtime_put_autosuspend(&hu->serdev->dev); + if (hu->serdev) { + pm_runtime_get_sync(&hu->serdev->dev); + pm_runtime_mark_last_busy(&hu->serdev->dev); + pm_runtime_put_autosuspend(&hu->serdev->dev); + } return 0; } diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 3b00d82d36cf7..4cda890ce6470 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -305,6 +305,8 @@ int hci_uart_register_device(struct hci_uart *hu, if (err) return err; + percpu_init_rwsem(&hu->proto_lock); + err = p->open(hu); if (err) goto err_open; @@ -327,7 +329,6 @@ int hci_uart_register_device(struct hci_uart *hu, INIT_WORK(&hu->init_ready, hci_uart_init_work); INIT_WORK(&hu->write_work, hci_uart_write_work); - percpu_init_rwsem(&hu->proto_lock); /* Only when vendor specific setup callback is provided, consider * the manufacturer information valid. This avoids filling in the diff --git a/drivers/bus/mhi/core/debugfs.c b/drivers/bus/mhi/core/debugfs.c index 858d7516410bb..d818586c229d2 100644 --- a/drivers/bus/mhi/core/debugfs.c +++ b/drivers/bus/mhi/core/debugfs.c @@ -60,16 +60,16 @@ static int mhi_debugfs_events_show(struct seq_file *m, void *d) } seq_printf(m, "Index: %d intmod count: %lu time: %lu", - i, (er_ctxt->intmod & EV_CTX_INTMODC_MASK) >> + i, (le32_to_cpu(er_ctxt->intmod) & EV_CTX_INTMODC_MASK) >> EV_CTX_INTMODC_SHIFT, - (er_ctxt->intmod & EV_CTX_INTMODT_MASK) >> + (le32_to_cpu(er_ctxt->intmod) & EV_CTX_INTMODT_MASK) >> EV_CTX_INTMODT_SHIFT); - seq_printf(m, " base: 0x%0llx len: 0x%llx", er_ctxt->rbase, - er_ctxt->rlen); + seq_printf(m, " base: 0x%0llx len: 0x%llx", le64_to_cpu(er_ctxt->rbase), + le64_to_cpu(er_ctxt->rlen)); - seq_printf(m, " rp: 0x%llx wp: 0x%llx", er_ctxt->rp, - er_ctxt->wp); + seq_printf(m, " rp: 0x%llx wp: 0x%llx", le64_to_cpu(er_ctxt->rp), + le64_to_cpu(er_ctxt->wp)); seq_printf(m, " local rp: 0x%pK db: 0x%pad\n", ring->rp, &mhi_event->db_cfg.db_val); @@ -106,18 +106,18 @@ static int mhi_debugfs_channels_show(struct seq_file *m, void *d) seq_printf(m, "%s(%u) state: 0x%lx brstmode: 0x%lx pollcfg: 0x%lx", - mhi_chan->name, mhi_chan->chan, (chan_ctxt->chcfg & + mhi_chan->name, mhi_chan->chan, (le32_to_cpu(chan_ctxt->chcfg) & CHAN_CTX_CHSTATE_MASK) >> CHAN_CTX_CHSTATE_SHIFT, - (chan_ctxt->chcfg & CHAN_CTX_BRSTMODE_MASK) >> - CHAN_CTX_BRSTMODE_SHIFT, (chan_ctxt->chcfg & + (le32_to_cpu(chan_ctxt->chcfg) & CHAN_CTX_BRSTMODE_MASK) >> + CHAN_CTX_BRSTMODE_SHIFT, (le32_to_cpu(chan_ctxt->chcfg) & CHAN_CTX_POLLCFG_MASK) >> CHAN_CTX_POLLCFG_SHIFT); - seq_printf(m, " type: 0x%x event ring: %u", chan_ctxt->chtype, - chan_ctxt->erindex); + seq_printf(m, " type: 0x%x event ring: %u", le32_to_cpu(chan_ctxt->chtype), + le32_to_cpu(chan_ctxt->erindex)); seq_printf(m, " base: 0x%llx len: 0x%llx rp: 0x%llx wp: 0x%llx", - chan_ctxt->rbase, chan_ctxt->rlen, chan_ctxt->rp, - chan_ctxt->wp); + le64_to_cpu(chan_ctxt->rbase), le64_to_cpu(chan_ctxt->rlen), + le64_to_cpu(chan_ctxt->rp), le64_to_cpu(chan_ctxt->wp)); seq_printf(m, " local rp: 0x%pK local wp: 0x%pK db: 0x%pad\n", ring->rp, ring->wp, diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index 046f407dc5d6e..d8787aaa176ba 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -77,12 +77,14 @@ static const char * const mhi_pm_state_str[] = { [MHI_PM_STATE_LD_ERR_FATAL_DETECT] = "Linkdown or Error Fatal Detect", }; -const char *to_mhi_pm_state_str(enum mhi_pm_state state) +const char *to_mhi_pm_state_str(u32 state) { - unsigned long pm_state = state; - int index = find_last_bit(&pm_state, 32); + int index; - if (index >= ARRAY_SIZE(mhi_pm_state_str)) + if (state) + index = __fls(state); + + if (!state || index >= ARRAY_SIZE(mhi_pm_state_str)) return "Invalid State"; return mhi_pm_state_str[index]; @@ -291,17 +293,17 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) if (mhi_chan->offload_ch) continue; - tmp = chan_ctxt->chcfg; + tmp = le32_to_cpu(chan_ctxt->chcfg); tmp &= ~CHAN_CTX_CHSTATE_MASK; tmp |= (MHI_CH_STATE_DISABLED << CHAN_CTX_CHSTATE_SHIFT); tmp &= ~CHAN_CTX_BRSTMODE_MASK; tmp |= (mhi_chan->db_cfg.brstmode << CHAN_CTX_BRSTMODE_SHIFT); tmp &= ~CHAN_CTX_POLLCFG_MASK; tmp |= (mhi_chan->db_cfg.pollcfg << CHAN_CTX_POLLCFG_SHIFT); - chan_ctxt->chcfg = tmp; + chan_ctxt->chcfg = cpu_to_le32(tmp); - chan_ctxt->chtype = mhi_chan->type; - chan_ctxt->erindex = mhi_chan->er_index; + chan_ctxt->chtype = cpu_to_le32(mhi_chan->type); + chan_ctxt->erindex = cpu_to_le32(mhi_chan->er_index); mhi_chan->ch_state = MHI_CH_STATE_DISABLED; mhi_chan->tre_ring.db_addr = (void __iomem *)&chan_ctxt->wp; @@ -326,14 +328,14 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) if (mhi_event->offload_ev) continue; - tmp = er_ctxt->intmod; + tmp = le32_to_cpu(er_ctxt->intmod); tmp &= ~EV_CTX_INTMODC_MASK; tmp &= ~EV_CTX_INTMODT_MASK; tmp |= (mhi_event->intmod << EV_CTX_INTMODT_SHIFT); - er_ctxt->intmod = tmp; + er_ctxt->intmod = cpu_to_le32(tmp); - er_ctxt->ertype = MHI_ER_TYPE_VALID; - er_ctxt->msivec = mhi_event->irq; + er_ctxt->ertype = cpu_to_le32(MHI_ER_TYPE_VALID); + er_ctxt->msivec = cpu_to_le32(mhi_event->irq); mhi_event->db_cfg.db_mode = true; ring->el_size = sizeof(struct mhi_tre); @@ -347,9 +349,9 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) * ring is empty */ ring->rp = ring->wp = ring->base; - er_ctxt->rbase = ring->iommu_base; + er_ctxt->rbase = cpu_to_le64(ring->iommu_base); er_ctxt->rp = er_ctxt->wp = er_ctxt->rbase; - er_ctxt->rlen = ring->len; + er_ctxt->rlen = cpu_to_le64(ring->len); ring->ctxt_wp = &er_ctxt->wp; } @@ -376,9 +378,9 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) goto error_alloc_cmd; ring->rp = ring->wp = ring->base; - cmd_ctxt->rbase = ring->iommu_base; + cmd_ctxt->rbase = cpu_to_le64(ring->iommu_base); cmd_ctxt->rp = cmd_ctxt->wp = cmd_ctxt->rbase; - cmd_ctxt->rlen = ring->len; + cmd_ctxt->rlen = cpu_to_le64(ring->len); ring->ctxt_wp = &cmd_ctxt->wp; } @@ -579,10 +581,10 @@ void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, chan_ctxt->rp = 0; chan_ctxt->wp = 0; - tmp = chan_ctxt->chcfg; + tmp = le32_to_cpu(chan_ctxt->chcfg); tmp &= ~CHAN_CTX_CHSTATE_MASK; tmp |= (MHI_CH_STATE_DISABLED << CHAN_CTX_CHSTATE_SHIFT); - chan_ctxt->chcfg = tmp; + chan_ctxt->chcfg = cpu_to_le32(tmp); /* Update to all cores */ smp_wmb(); @@ -616,14 +618,14 @@ int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl, return -ENOMEM; } - tmp = chan_ctxt->chcfg; + tmp = le32_to_cpu(chan_ctxt->chcfg); tmp &= ~CHAN_CTX_CHSTATE_MASK; tmp |= (MHI_CH_STATE_ENABLED << CHAN_CTX_CHSTATE_SHIFT); - chan_ctxt->chcfg = tmp; + chan_ctxt->chcfg = cpu_to_le32(tmp); - chan_ctxt->rbase = tre_ring->iommu_base; + chan_ctxt->rbase = cpu_to_le64(tre_ring->iommu_base); chan_ctxt->rp = chan_ctxt->wp = chan_ctxt->rbase; - chan_ctxt->rlen = tre_ring->len; + chan_ctxt->rlen = cpu_to_le64(tre_ring->len); tre_ring->ctxt_wp = &chan_ctxt->wp; tre_ring->rp = tre_ring->wp = tre_ring->base; diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index e2e10474a9d92..37c39bf1c7a98 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -209,14 +209,14 @@ extern struct bus_type mhi_bus_type; #define EV_CTX_INTMODT_MASK GENMASK(31, 16) #define EV_CTX_INTMODT_SHIFT 16 struct mhi_event_ctxt { - __u32 intmod; - __u32 ertype; - __u32 msivec; - - __u64 rbase __packed __aligned(4); - __u64 rlen __packed __aligned(4); - __u64 rp __packed __aligned(4); - __u64 wp __packed __aligned(4); + __le32 intmod; + __le32 ertype; + __le32 msivec; + + __le64 rbase __packed __aligned(4); + __le64 rlen __packed __aligned(4); + __le64 rp __packed __aligned(4); + __le64 wp __packed __aligned(4); }; #define CHAN_CTX_CHSTATE_MASK GENMASK(7, 0) @@ -227,25 +227,25 @@ struct mhi_event_ctxt { #define CHAN_CTX_POLLCFG_SHIFT 10 #define CHAN_CTX_RESERVED_MASK GENMASK(31, 16) struct mhi_chan_ctxt { - __u32 chcfg; - __u32 chtype; - __u32 erindex; - - __u64 rbase __packed __aligned(4); - __u64 rlen __packed __aligned(4); - __u64 rp __packed __aligned(4); - __u64 wp __packed __aligned(4); + __le32 chcfg; + __le32 chtype; + __le32 erindex; + + __le64 rbase __packed __aligned(4); + __le64 rlen __packed __aligned(4); + __le64 rp __packed __aligned(4); + __le64 wp __packed __aligned(4); }; struct mhi_cmd_ctxt { - __u32 reserved0; - __u32 reserved1; - __u32 reserved2; - - __u64 rbase __packed __aligned(4); - __u64 rlen __packed __aligned(4); - __u64 rp __packed __aligned(4); - __u64 wp __packed __aligned(4); + __le32 reserved0; + __le32 reserved1; + __le32 reserved2; + + __le64 rbase __packed __aligned(4); + __le64 rlen __packed __aligned(4); + __le64 rp __packed __aligned(4); + __le64 wp __packed __aligned(4); }; struct mhi_ctxt { @@ -258,8 +258,8 @@ struct mhi_ctxt { }; struct mhi_tre { - u64 ptr; - u32 dword[2]; + __le64 ptr; + __le32 dword[2]; }; struct bhi_vec_entry { @@ -277,57 +277,58 @@ enum mhi_cmd_type { /* No operation command */ #define MHI_TRE_CMD_NOOP_PTR (0) #define MHI_TRE_CMD_NOOP_DWORD0 (0) -#define MHI_TRE_CMD_NOOP_DWORD1 (MHI_CMD_NOP << 16) +#define MHI_TRE_CMD_NOOP_DWORD1 (cpu_to_le32(MHI_CMD_NOP << 16)) /* Channel reset command */ #define MHI_TRE_CMD_RESET_PTR (0) #define MHI_TRE_CMD_RESET_DWORD0 (0) -#define MHI_TRE_CMD_RESET_DWORD1(chid) ((chid << 24) | \ - (MHI_CMD_RESET_CHAN << 16)) +#define MHI_TRE_CMD_RESET_DWORD1(chid) (cpu_to_le32((chid << 24) | \ + (MHI_CMD_RESET_CHAN << 16))) /* Channel stop command */ #define MHI_TRE_CMD_STOP_PTR (0) #define MHI_TRE_CMD_STOP_DWORD0 (0) -#define MHI_TRE_CMD_STOP_DWORD1(chid) ((chid << 24) | \ - (MHI_CMD_STOP_CHAN << 16)) +#define MHI_TRE_CMD_STOP_DWORD1(chid) (cpu_to_le32((chid << 24) | \ + (MHI_CMD_STOP_CHAN << 16))) /* Channel start command */ #define MHI_TRE_CMD_START_PTR (0) #define MHI_TRE_CMD_START_DWORD0 (0) -#define MHI_TRE_CMD_START_DWORD1(chid) ((chid << 24) | \ - (MHI_CMD_START_CHAN << 16)) +#define MHI_TRE_CMD_START_DWORD1(chid) (cpu_to_le32((chid << 24) | \ + (MHI_CMD_START_CHAN << 16))) -#define MHI_TRE_GET_CMD_CHID(tre) (((tre)->dword[1] >> 24) & 0xFF) -#define MHI_TRE_GET_CMD_TYPE(tre) (((tre)->dword[1] >> 16) & 0xFF) +#define MHI_TRE_GET_DWORD(tre, word) (le32_to_cpu((tre)->dword[(word)])) +#define MHI_TRE_GET_CMD_CHID(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 24) & 0xFF) +#define MHI_TRE_GET_CMD_TYPE(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 16) & 0xFF) /* Event descriptor macros */ -#define MHI_TRE_EV_PTR(ptr) (ptr) -#define MHI_TRE_EV_DWORD0(code, len) ((code << 24) | len) -#define MHI_TRE_EV_DWORD1(chid, type) ((chid << 24) | (type << 16)) -#define MHI_TRE_GET_EV_PTR(tre) ((tre)->ptr) -#define MHI_TRE_GET_EV_CODE(tre) (((tre)->dword[0] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_LEN(tre) ((tre)->dword[0] & 0xFFFF) -#define MHI_TRE_GET_EV_CHID(tre) (((tre)->dword[1] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_TYPE(tre) (((tre)->dword[1] >> 16) & 0xFF) -#define MHI_TRE_GET_EV_STATE(tre) (((tre)->dword[0] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_EXECENV(tre) (((tre)->dword[0] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_SEQ(tre) ((tre)->dword[0]) -#define MHI_TRE_GET_EV_TIME(tre) ((tre)->ptr) -#define MHI_TRE_GET_EV_COOKIE(tre) lower_32_bits((tre)->ptr) -#define MHI_TRE_GET_EV_VEID(tre) (((tre)->dword[0] >> 16) & 0xFF) -#define MHI_TRE_GET_EV_LINKSPEED(tre) (((tre)->dword[1] >> 24) & 0xFF) -#define MHI_TRE_GET_EV_LINKWIDTH(tre) ((tre)->dword[0] & 0xFF) +#define MHI_TRE_EV_PTR(ptr) (cpu_to_le64(ptr)) +#define MHI_TRE_EV_DWORD0(code, len) (cpu_to_le32((code << 24) | len)) +#define MHI_TRE_EV_DWORD1(chid, type) (cpu_to_le32((chid << 24) | (type << 16))) +#define MHI_TRE_GET_EV_PTR(tre) (le64_to_cpu((tre)->ptr)) +#define MHI_TRE_GET_EV_CODE(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_LEN(tre) (MHI_TRE_GET_DWORD(tre, 0) & 0xFFFF) +#define MHI_TRE_GET_EV_CHID(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_TYPE(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 16) & 0xFF) +#define MHI_TRE_GET_EV_STATE(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_EXECENV(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_SEQ(tre) MHI_TRE_GET_DWORD(tre, 0) +#define MHI_TRE_GET_EV_TIME(tre) (MHI_TRE_GET_EV_PTR(tre)) +#define MHI_TRE_GET_EV_COOKIE(tre) lower_32_bits(MHI_TRE_GET_EV_PTR(tre)) +#define MHI_TRE_GET_EV_VEID(tre) ((MHI_TRE_GET_DWORD(tre, 0) >> 16) & 0xFF) +#define MHI_TRE_GET_EV_LINKSPEED(tre) ((MHI_TRE_GET_DWORD(tre, 1) >> 24) & 0xFF) +#define MHI_TRE_GET_EV_LINKWIDTH(tre) (MHI_TRE_GET_DWORD(tre, 0) & 0xFF) /* Transfer descriptor macros */ -#define MHI_TRE_DATA_PTR(ptr) (ptr) -#define MHI_TRE_DATA_DWORD0(len) (len & MHI_MAX_MTU) -#define MHI_TRE_DATA_DWORD1(bei, ieot, ieob, chain) ((2 << 16) | (bei << 10) \ - | (ieot << 9) | (ieob << 8) | chain) +#define MHI_TRE_DATA_PTR(ptr) (cpu_to_le64(ptr)) +#define MHI_TRE_DATA_DWORD0(len) (cpu_to_le32(len & MHI_MAX_MTU)) +#define MHI_TRE_DATA_DWORD1(bei, ieot, ieob, chain) (cpu_to_le32((2 << 16) | (bei << 10) \ + | (ieot << 9) | (ieob << 8) | chain)) /* RSC transfer descriptor macros */ -#define MHI_RSCTRE_DATA_PTR(ptr, len) (((u64)len << 48) | ptr) -#define MHI_RSCTRE_DATA_DWORD0(cookie) (cookie) -#define MHI_RSCTRE_DATA_DWORD1 (MHI_PKT_TYPE_COALESCING << 16) +#define MHI_RSCTRE_DATA_PTR(ptr, len) (cpu_to_le64(((u64)len << 48) | ptr)) +#define MHI_RSCTRE_DATA_DWORD0(cookie) (cpu_to_le32(cookie)) +#define MHI_RSCTRE_DATA_DWORD1 (cpu_to_le32(MHI_PKT_TYPE_COALESCING << 16)) enum mhi_pkt_type { MHI_PKT_TYPE_INVALID = 0x0, @@ -500,7 +501,7 @@ struct state_transition { struct mhi_ring { dma_addr_t dma_handle; dma_addr_t iommu_base; - u64 *ctxt_wp; /* point to ctxt wp */ + __le64 *ctxt_wp; /* point to ctxt wp */ void *pre_aligned; void *base; void *rp; @@ -622,7 +623,7 @@ void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl, enum mhi_pm_state __must_check mhi_tryset_pm_state( struct mhi_controller *mhi_cntrl, enum mhi_pm_state state); -const char *to_mhi_pm_state_str(enum mhi_pm_state state); +const char *to_mhi_pm_state_str(u32 state); int mhi_queue_state_transition(struct mhi_controller *mhi_cntrl, enum dev_st_transition state); void mhi_pm_st_worker(struct work_struct *work); diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index ffde617f93a3b..85f4f7c8d7c60 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -114,7 +114,7 @@ void mhi_ring_er_db(struct mhi_event *mhi_event) struct mhi_ring *ring = &mhi_event->ring; mhi_event->db_cfg.process_db(mhi_event->mhi_cntrl, &mhi_event->db_cfg, - ring->db_addr, *ring->ctxt_wp); + ring->db_addr, le64_to_cpu(*ring->ctxt_wp)); } void mhi_ring_cmd_db(struct mhi_controller *mhi_cntrl, struct mhi_cmd *mhi_cmd) @@ -123,7 +123,7 @@ void mhi_ring_cmd_db(struct mhi_controller *mhi_cntrl, struct mhi_cmd *mhi_cmd) struct mhi_ring *ring = &mhi_cmd->ring; db = ring->iommu_base + (ring->wp - ring->base); - *ring->ctxt_wp = db; + *ring->ctxt_wp = cpu_to_le64(db); mhi_write_db(mhi_cntrl, ring->db_addr, db); } @@ -140,7 +140,7 @@ void mhi_ring_chan_db(struct mhi_controller *mhi_cntrl, * before letting h/w know there is new element to fetch. */ dma_wmb(); - *ring->ctxt_wp = db; + *ring->ctxt_wp = cpu_to_le64(db); mhi_chan->db_cfg.process_db(mhi_cntrl, &mhi_chan->db_cfg, ring->db_addr, db); @@ -432,7 +432,7 @@ irqreturn_t mhi_irq_handler(int irq_number, void *dev) struct mhi_event_ctxt *er_ctxt = &mhi_cntrl->mhi_ctxt->er_ctxt[mhi_event->er_index]; struct mhi_ring *ev_ring = &mhi_event->ring; - dma_addr_t ptr = er_ctxt->rp; + dma_addr_t ptr = le64_to_cpu(er_ctxt->rp); void *dev_rp; if (!is_valid_ring_ptr(ev_ring, ptr)) { @@ -537,14 +537,14 @@ static void mhi_recycle_ev_ring_element(struct mhi_controller *mhi_cntrl, /* Update the WP */ ring->wp += ring->el_size; - ctxt_wp = *ring->ctxt_wp + ring->el_size; + ctxt_wp = le64_to_cpu(*ring->ctxt_wp) + ring->el_size; if (ring->wp >= (ring->base + ring->len)) { ring->wp = ring->base; ctxt_wp = ring->iommu_base; } - *ring->ctxt_wp = ctxt_wp; + *ring->ctxt_wp = cpu_to_le64(ctxt_wp); /* Update the RP */ ring->rp += ring->el_size; @@ -801,7 +801,7 @@ int mhi_process_ctrl_ev_ring(struct mhi_controller *mhi_cntrl, struct device *dev = &mhi_cntrl->mhi_dev->dev; u32 chan; int count = 0; - dma_addr_t ptr = er_ctxt->rp; + dma_addr_t ptr = le64_to_cpu(er_ctxt->rp); /* * This is a quick check to avoid unnecessary event processing @@ -940,7 +940,7 @@ int mhi_process_ctrl_ev_ring(struct mhi_controller *mhi_cntrl, mhi_recycle_ev_ring_element(mhi_cntrl, ev_ring); local_rp = ev_ring->rp; - ptr = er_ctxt->rp; + ptr = le64_to_cpu(er_ctxt->rp); if (!is_valid_ring_ptr(ev_ring, ptr)) { dev_err(&mhi_cntrl->mhi_dev->dev, "Event ring rp points outside of the event ring\n"); @@ -970,7 +970,7 @@ int mhi_process_data_event_ring(struct mhi_controller *mhi_cntrl, int count = 0; u32 chan; struct mhi_chan *mhi_chan; - dma_addr_t ptr = er_ctxt->rp; + dma_addr_t ptr = le64_to_cpu(er_ctxt->rp); if (unlikely(MHI_EVENT_ACCESS_INVALID(mhi_cntrl->pm_state))) return -EIO; @@ -1011,7 +1011,7 @@ int mhi_process_data_event_ring(struct mhi_controller *mhi_cntrl, mhi_recycle_ev_ring_element(mhi_cntrl, ev_ring); local_rp = ev_ring->rp; - ptr = er_ctxt->rp; + ptr = le64_to_cpu(er_ctxt->rp); if (!is_valid_ring_ptr(ev_ring, ptr)) { dev_err(&mhi_cntrl->mhi_dev->dev, "Event ring rp points outside of the event ring\n"); @@ -1533,7 +1533,7 @@ static void mhi_mark_stale_events(struct mhi_controller *mhi_cntrl, /* mark all stale events related to channel as STALE event */ spin_lock_irqsave(&mhi_event->lock, flags); - ptr = er_ctxt->rp; + ptr = le64_to_cpu(er_ctxt->rp); if (!is_valid_ring_ptr(ev_ring, ptr)) { dev_err(&mhi_cntrl->mhi_dev->dev, "Event ring rp points outside of the event ring\n"); diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c index 4aae0baea0084..c35c5ddc72207 100644 --- a/drivers/bus/mhi/core/pm.c +++ b/drivers/bus/mhi/core/pm.c @@ -218,7 +218,7 @@ int mhi_ready_state_transition(struct mhi_controller *mhi_cntrl) continue; ring->wp = ring->base + ring->len - ring->el_size; - *ring->ctxt_wp = ring->iommu_base + ring->len - ring->el_size; + *ring->ctxt_wp = cpu_to_le64(ring->iommu_base + ring->len - ring->el_size); /* Update all cores */ smp_wmb(); @@ -420,7 +420,7 @@ static int mhi_pm_mission_mode_transition(struct mhi_controller *mhi_cntrl) continue; ring->wp = ring->base + ring->len - ring->el_size; - *ring->ctxt_wp = ring->iommu_base + ring->len - ring->el_size; + *ring->ctxt_wp = cpu_to_le64(ring->iommu_base + ring->len - ring->el_size); /* Update to all cores */ smp_wmb(); diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index b79895810c52f..9527b7d638401 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -327,6 +327,7 @@ static const struct mhi_pci_dev_info mhi_quectel_em1xx_info = { .config = &modem_quectel_em1xx_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, .dma_data_width = 32, + .mru_default = 32768, .sideband_wake = true, }; diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c index 626dedd110cbc..fca0d0669aa97 100644 --- a/drivers/bus/mips_cdmm.c +++ b/drivers/bus/mips_cdmm.c @@ -351,6 +351,7 @@ phys_addr_t __weak mips_cdmm_phys_base(void) np = of_find_compatible_node(NULL, NULL, "mti,mips-cdmm"); if (np) { err = of_address_to_resource(np, 0, &res); + of_node_put(np); if (!err) return res.start; } diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 740811893c570..93d52a419470a 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -449,6 +449,9 @@ config RANDOM_TRUST_BOOTLOADER device randomness. Say Y here to assume the entropy provided by the booloader is trustworthy so it will be added to the kernel's entropy pool. Otherwise, say N here so it will be regarded as device input that - only mixes the entropy pool. + only mixes the entropy pool. This can also be configured at boot with + "random.trust_bootloader=on/off". + +source "drivers/char/lrng/Kconfig" endmenu diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 264eb398fdd4f..7371f7464a49f 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -3,7 +3,14 @@ # Makefile for the kernel character device drivers. # -obj-y += mem.o random.o +obj-y += mem.o + +ifeq ($(CONFIG_LRNG),y) + obj-y += lrng/ +else + obj-y += random.o +endif + obj-$(CONFIG_TTY_PRINTK) += ttyprintk.o obj-y += misc.o obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 9704963f9d500..a087156a58186 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -401,7 +401,7 @@ config HW_RANDOM_MESON config HW_RANDOM_CAVIUM tristate "Cavium ThunderX Random Number Generator support" - depends on HW_RANDOM && PCI && ARM64 + depends on HW_RANDOM && PCI && ARCH_THUNDER default HW_RANDOM help This driver provides kernel-side support for the Random Number diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index ecb71c4317a50..8cf0ef501341e 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -114,6 +114,7 @@ static int atmel_trng_probe(struct platform_device *pdev) err_register: clk_disable_unprepare(trng->clk); + atmel_trng_disable(trng); return ret; } diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c index 6f66919652bf5..7c55f4cf4a8ba 100644 --- a/drivers/char/hw_random/cavium-rng-vf.c +++ b/drivers/char/hw_random/cavium-rng-vf.c @@ -179,7 +179,7 @@ static int cavium_map_pf_regs(struct cavium_rng *rng) pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CAVIUM_RNG_PF, NULL); if (!pdev) { - dev_err(&pdev->dev, "Cannot find RNG PF device\n"); + pr_err("Cannot find RNG PF device\n"); return -EIO; } diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c index 67947a19aa225..e8f9621e79541 100644 --- a/drivers/char/hw_random/nomadik-rng.c +++ b/drivers/char/hw_random/nomadik-rng.c @@ -65,14 +65,14 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id) out_release: amba_release_regions(dev); out_clk: - clk_disable(rng_clk); + clk_disable_unprepare(rng_clk); return ret; } static void nmk_rng_remove(struct amba_device *dev) { amba_release_regions(dev); - clk_disable(rng_clk); + clk_disable_unprepare(rng_clk); } static const struct amba_id nmk_rng_ids[] = { diff --git a/drivers/char/lrng/Kconfig b/drivers/char/lrng/Kconfig new file mode 100644 index 0000000000000..658f89f6b35fd --- /dev/null +++ b/drivers/char/lrng/Kconfig @@ -0,0 +1,613 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Linux Random Number Generator configuration +# + +choice + prompt "Random Number Generator Implementation" + default RANDOM_DEFAULT_IMPL + help + Select the random number generator implementation that is + accessible via /dev/random, /dev/urandom, getrandom(2), and + the in-kernel function get_random_bytes. + +config RANDOM_DEFAULT_IMPL + bool "Default Implementation" + help + The default random number generator as provided with + drivers/char/random.c is selected with this option. + +config LRNG + bool "LRNG Implementation with SP800-90A/B/C compliance" + select CRYPTO_LIB_SHA256 if CRYPTO + help + The Linux Random Number Generator (LRNG) generates entropy + from different entropy sources. Each entropy source can + be enabled and configured independently. The interrupt + entropy source can be configured to be SP800-90B compliant. + The entire LRNG can be configured to be SP800-90C compliant. + Runtime-switchable cryptographic support is available. + The LRNG delivers significant entropy during boot. + +endchoice + +menu "Linux Random Number Generator Configuration" + depends on LRNG + +if LRNG + +menu "Specific DRNG seeding strategies" + +config LRNG_OVERSAMPLE_ENTROPY_SOURCES + bool "Oversample entropy sources" + default n + help + When enabling this option, the entropy sources are + over-sampled with the following approach: First, the + the entropy sources are requested to provide 64 bits more + entropy than the size of the entropy buffer. For example, + if the entropy buffer is 256 bits, 320 bits of entropy + is requested to fill that buffer. + + Second, the seed operation of the deterministic RNG + requests 128 bits more data from each entropy source than + the security strength of the DRNG during initialization. + A prerequisite for this operation is that the digest size + of the used hash must be at least equally large to generate + that buffer. If the prerequisite is not met, this + oversampling is not applied. + + This strategy is intended to offset the asymptotic entropy + increase to reach full entropy in a buffer. + + The strategy is consistent with the requirements in + NIST SP800-90C and is only enforced with fips=1. + + If unsure, say N. + +config LRNG_OVERSAMPLE_ES_BITS + int + default 0 if !LRNG_OVERSAMPLE_ENTROPY_SOURCES + default 64 if LRNG_OVERSAMPLE_ENTROPY_SOURCES + +config LRNG_SEED_BUFFER_INIT_ADD_BITS + int + default 0 if !LRNG_OVERSAMPLE_ENTROPY_SOURCES + default 128 if LRNG_OVERSAMPLE_ENTROPY_SOURCES + +endmenu # "Specific DRNG seeding strategies" + +menu "Entropy Source Configuration" + +comment "Interrupt Entropy Source" + +config LRNG_IRQ + bool "Enable Interrupt Entropy Source as LRNG Seed Source" + default y + help + The LRNG models an entropy source based on the timing of the + occurrence of interrupts. Enable this option to enable this + IRQ entropy source. + + The IRQ entropy source is triggered every time an interrupt + arrives and thus causes the interrupt handler to execute + slightly longer. Disabling the IRQ entropy source implies + that the performance penalty on the interrupt handler added + by the LRNG is eliminated. Yet, this entropy source is + considered to be the internal entropy source of the LRNG. + Thus, only disable it if you ensured that other entropy + sources are available that supply the LRNG with entropy. + + If you disable the IRQ entropy source, you MUST ensure + one or more entropy sources collectively have the + capability to deliver sufficient entropy with one invocation + at a rate compliant to the security strength of the DRNG + (usually 256 bits of entropy). In addition, if those + entropy sources do not deliver sufficient entropy during + first request, the reseed must be triggered from user + space or kernel space when sufficient entropy is considered + to be present. + + If unsure, say Y. + +choice + prompt "Continuous entropy compression boot time setting" + default LRNG_CONTINUOUS_COMPRESSION_ENABLED + depends on LRNG_IRQ + help + Select the default behavior of the interrupt entropy source + continuous compression operation. + + The Linux RNG collects entropy data during each interrupt. + For performance reasons, a amount of entropy data defined by + the LRNG entropy collection pool size is concatenated into + an array. When that array is filled up, a hash is calculated + to compress the entropy. That hash is calculated in + interrupt context. + + In case such hash calculation in interrupt context is deemed + too time-consuming, the continuous compression operation + can be disabled. If disabled, the collection of entropy will + not trigger a hash compression operation in interrupt context. + The compression happens only when the DRNG is reseeded which is + in process context. This implies that old entropy data + collected after the last DRNG-reseed is overwritten with newer + entropy data once the collection pool is full instead of + retaining its entropy with the compression operation. + + config LRNG_CONTINUOUS_COMPRESSION_ENABLED + bool "Enable continuous compression (default)" + + config LRNG_CONTINUOUS_COMPRESSION_DISABLED + bool "Disable continuous compression" +endchoice + +config LRNG_ENABLE_CONTINUOUS_COMPRESSION + bool + default y if LRNG_CONTINUOUS_COMPRESSION_ENABLED + default n if LRNG_CONTINUOUS_COMPRESSION_DISABLED + +config LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION + bool "Runtime-switchable continuous entropy compression" + depends on LRNG_IRQ + help + Per default, the interrupt entropy source continuous + compression operation behavior is hard-wired into the kernel. + Enable this option to allow it to be configurable at boot time. + + To modify the default behavior of the continuous + compression operation, use the kernel command line option + of lrng_sw_noise.lrng_pcpu_continuous_compression. + + If unsure, say N. + +choice + prompt "LRNG Entropy Collection Pool Size" + default LRNG_COLLECTION_SIZE_1024 + depends on LRNG_IRQ + help + Select the size of the LRNG entropy collection pool + storing data for the interrupt entropy source without + performing a compression operation. The larger the + collection size is, the faster the average interrupt + handling will be. The collection size represents the + number of bytes of the per-CPU memory used to batch + up entropy event data. + + The default value is good for regular operations. Choose + larger sizes for servers that have no memory limitations. + If runtime memory is precious, choose a smaller size. + + The collection size is unrelated to the entropy rate + or the amount of entropy the LRNG can process. + + config LRNG_COLLECTION_SIZE_32 + depends on LRNG_CONTINUOUS_COMPRESSION_ENABLED + depends on !LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION + depends on !LRNG_OVERSAMPLE_ENTROPY_SOURCES + bool "32 interrupt events" + + config LRNG_COLLECTION_SIZE_256 + depends on !LRNG_OVERSAMPLE_ENTROPY_SOURCES + bool "256 interrupt events" + + config LRNG_COLLECTION_SIZE_512 + bool "512 interrupt events" + + config LRNG_COLLECTION_SIZE_1024 + bool "1024 interrupt events (default)" + + config LRNG_COLLECTION_SIZE_2048 + bool "2048 interrupt events" + + config LRNG_COLLECTION_SIZE_4096 + bool "4096 interrupt events" + + config LRNG_COLLECTION_SIZE_8192 + bool "8192 interrupt events" + +endchoice + +config LRNG_COLLECTION_SIZE + int + default 32 if LRNG_COLLECTION_SIZE_32 + default 256 if LRNG_COLLECTION_SIZE_256 + default 512 if LRNG_COLLECTION_SIZE_512 + default 1024 if LRNG_COLLECTION_SIZE_1024 + default 2048 if LRNG_COLLECTION_SIZE_2048 + default 4096 if LRNG_COLLECTION_SIZE_4096 + default 8192 if LRNG_COLLECTION_SIZE_8192 + +config LRNG_HEALTH_TESTS + bool "Enable interrupt entropy source online health tests" + depends on LRNG_IRQ + help + The online health tests applied to the interrupt entropy + source validate the noise source at runtime for fatal + errors. These tests include SP800-90B compliant tests + which are invoked if the system is booted with fips=1. + In case of fatal errors during active SP800-90B tests, + the issue is logged and the noise data is discarded. + These tests are required for full compliance of the + interrupt entropy source with SP800-90B. + + If unsure, say Y. + +config LRNG_RCT_BROKEN + bool "SP800-90B RCT with dangerous low cutoff value" + depends on LRNG_HEALTH_TESTS + depends on BROKEN + default n + help + This option enables a dangerously low SP800-90B repetitive + count test (RCT) cutoff value which makes it very likely + that the RCT is triggered to raise a self test failure. + + This option is ONLY intended for developers wanting to + test the effectiveness of the SP800-90B RCT health test. + + If unsure, say N. + +config LRNG_APT_BROKEN + bool "SP800-90B APT with dangerous low cutoff value" + depends on LRNG_HEALTH_TESTS + depends on BROKEN + default n + help + This option enables a dangerously low SP800-90B adaptive + proportion test (APT) cutoff value which makes it very + likely that the APT is triggered to raise a self test + failure. + + This option is ONLY intended for developers wanting to + test the effectiveness of the SP800-90B APT health test. + + If unsure, say N. + +# Default taken from SP800-90B sec 4.4.1 - significance level 2^-30 +config LRNG_RCT_CUTOFF + int + default 31 if !LRNG_RCT_BROKEN + default 1 if LRNG_RCT_BROKEN + +# Default taken from SP800-90B sec 4.4.2 - significance level 2^-30 +config LRNG_APT_CUTOFF + int + default 325 if !LRNG_APT_BROKEN + default 32 if LRNG_APT_BROKEN + +config LRNG_IRQ_ENTROPY_RATE + int "Interrupt Entropy Source Entropy Rate" + depends on LRNG_IRQ + range 256 4294967295 + default 256 + help + The LRNG will collect the configured number of interrupts to + obtain 256 bits of entropy. This value can be set to any between + 256 and 4294967295. The LRNG guarantees that this value is not + lower than 256. This lower limit implies that one interrupt event + is credited with one bit of entropy. This value is subject to the + increase by the oversampling factor, if no high-resolution timer + is found. + + In order to effectively disable the interrupt entropy source, + the option has to be set to 4294967295. In this case, the + interrupt entropy source will still deliver data but without + being credited with entropy. + +comment "Jitter RNG Entropy Source" + +config LRNG_JENT + bool "Enable Jitter RNG as LRNG Seed Source" + depends on CRYPTO + select CRYPTO_JITTERENTROPY + help + The Linux RNG may use the Jitter RNG as noise source. Enabling + this option enables the use of the Jitter RNG. Its default + entropy level is 16 bits of entropy per 256 data bits delivered + by the Jitter RNG. This entropy level can be changed at boot + time or at runtime with the lrng_base.jitterrng configuration + variable. + +config LRNG_JENT_ENTROPY_RATE + int "Jitter RNG Entropy Source Entropy Rate" + depends on LRNG_JENT + range 0 256 + default 16 + help + The option defines the amount of entropy the LRNG applies to 256 + bits of data obtained from the Jitter RNG entropy source. The + LRNG enforces the limit that this value must be in the range + between 0 and 256. + + When configuring this value to 0, the Jitter RNG entropy source + will provide 256 bits of data without being credited to contain + entropy. + +comment "CPU Entropy Source" + +config LRNG_CPU + bool "Enable CPU Entropy Source as LRNG Seed Source" + default y + help + Current CPUs commonly contain entropy sources which can be + used to seed the LRNG. For example, the Intel RDSEED + instruction, or the POWER DARN instruction will be sourced + to seed the LRNG if this option is enabled. + + Note, if this option is enabled and the underlying CPU + does not offer such entropy source, the LRNG will automatically + detect this and ignore the hardware. + +config LRNG_CPU_FULL_ENT_MULTIPLIER + int + default 1 if !LRNG_TEST_CPU_ES_COMPRESSION + default 123 if LRNG_TEST_CPU_ES_COMPRESSION + +config LRNG_CPU_ENTROPY_RATE + int "CPU Entropy Source Entropy Rate" + depends on LRNG_CPU + range 0 256 + default 8 + help + The option defines the amount of entropy the LRNG applies to 256 + bits of data obtained from the CPU entropy source. The LRNG + enforces the limit that this value must be in the range between + 0 and 256. + + When configuring this value to 0, the CPU entropy source will + provide 256 bits of data without being credited to contain + entropy. + + Note, this option is overwritten when the option + CONFIG_RANDOM_TRUST_CPU is set. + +endmenu # "Entropy Source Configuration" + +menuconfig LRNG_DRNG_SWITCH + bool "Support DRNG runtime switching" + help + The Linux RNG per default uses a ChaCha20 DRNG that is + accessible via the external interfaces. With this configuration + option other DRNGs can be selected and loaded at runtime. + +if LRNG_DRNG_SWITCH + +config LRNG_KCAPI_HASH + bool + select CRYPTO_HASH + +config LRNG_DRBG + tristate "SP800-90A support for the LRNG" + depends on CRYPTO + select CRYPTO_DRBG_MENU + select CRYPTO_SHA512 + select LRNG_KCAPI_HASH + help + Enable the SP800-90A DRBG support for the LRNG. Once the + module is loaded, output from /dev/random, /dev/urandom, + getrandom(2), or get_random_bytes_full is provided by a DRBG. + +config LRNG_KCAPI + tristate "Kernel Crypto API support for the LRNG" + depends on CRYPTO + depends on !LRNG_DRBG + select CRYPTO_RNG + select LRNG_KCAPI_HASH + help + Enable the support for generic pseudo-random number + generators offered by the kernel crypto API with the + LRNG. Once the module is loaded, output from /dev/random, + /dev/urandom, getrandom(2), or get_random_bytes is + provided by the selected kernel crypto API RNG. +endif # LRNG_DRNG_SWITCH + +menuconfig LRNG_TESTING_MENU + bool "LRNG testing interfaces" + depends on DEBUG_FS + help + Enable one or more of the following test interfaces. + + If unsure, say N. + +if LRNG_TESTING_MENU + +config LRNG_RAW_HIRES_ENTROPY + bool "Enable entropy test interface to hires timer noise source" + default y + help + The test interface allows a privileged process to capture + the raw unconditioned high resolution time stamp noise that + is collected by the LRNG for statistical analysis. Extracted + noise data is not used to seed the LRNG. + + The raw noise data can be obtained using the lrng_raw_hires + debugfs file. Using the option lrng_testing.boot_raw_hires_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_JIFFIES_ENTROPY + bool "Enable entropy test interface to Jiffies noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned Jiffies that is collected by + the LRNG for statistical analysis. This data is used for + seeding the LRNG if a high-resolution time stamp is not + available. If a high-resolution time stamp is detected, + the Jiffies value is not collected by the LRNG and no + data is provided via the test interface. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_jiffies + debugfs file. Using the option lrng_testing.boot_raw_jiffies_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_IRQ_ENTROPY + bool "Enable entropy test interface to IRQ number noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned interrupt number that is collected by + the LRNG for statistical analysis. This data is used for + seeding the random32 PRNG external to the LRNG if a + high-resolution time stamp is available or it will be used to + seed the LRNG otherwise. Extracted noise data is not used to + seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_irq + debugfs file. Using the option lrng_testing.boot_raw_irq_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_IRQFLAGS_ENTROPY + bool "Enable entropy test interface to IRQ flags noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned interrupt flags that is collected by + the LRNG for statistical analysis. This data is used for + seeding the random32 PRNG external to the LRNG if a + high-resolution time stamp is available or it will be used to + seed the LRNG otherwise. Extracted noise data is not used to + seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_irqflags + debugfs file. Using the option lrng_testing.boot_raw_irqflags_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_RETIP_ENTROPY + bool "Enable entropy test interface to RETIP value noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned return instruction pointer value + that is collected by the LRNG for statistical analysis. + This data is used for seeding the random32 PRNG external + to the LRNG if a high-resolution time stamp is available or + it will be used to seed the LRNG otherwise. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_retip + debugfs file. Using the option lrng_testing.boot_raw_retip_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_REGS_ENTROPY + bool "Enable entropy test interface to IRQ register value noise source" + help + The test interface allows a privileged process to capture + the raw unconditioned interrupt register value that is + collected by the LRNG for statistical analysis. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_regs + debugfs file. Using the option lrng_testing.boot_raw_regs_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_ARRAY + bool "Enable test interface to LRNG raw entropy storage array" + help + The test interface allows a privileged process to capture + the raw noise data that is collected by the LRNG + in the per-CPU array for statistical analysis. The purpose + of this interface is to verify that the array handling code + truly only concatenates data and provides the same entropy + rate as the raw unconditioned noise source when assessing + the collected data byte-wise. + + The data can be obtained using the lrng_raw_array debugfs + file. Using the option lrng_testing.boot_raw_array=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_IRQ_PERF + bool "Enable LRNG interrupt performance monitor" + help + With this option, the performance monitor of the LRNG + interrupt handling code is enabled. The file provides + the execution time of the interrupt handler in + cycles. + + The interrupt performance data can be obtained using + the lrng_irq_perf debugfs file. Using the option + lrng_testing.boot_irq_perf=1 the performance data of + the first 1000 entropy events since boot can be sampled. + +config LRNG_ACVT_HASH + bool "Enable LRNG ACVT Hash interface" + help + With this option, the LRNG built-in hash function used for + auxiliary pool management and prior to switching the + cryptographic backends is made available for ACVT. The + interface allows writing of the data to be hashed + into the interface. The read operation triggers the hash + operation to generate message digest. + + The ACVT interface is available with the lrng_acvt_hash + debugfs file. + +config LRNG_RUNTIME_ES_CONFIG + bool "Enable runtime configuration of entropy sources" + help + When enabling this option, the LRNG provides the mechanism + allowing to alter the entropy rate of each entropy source + during boot time and runtime. + + The following interfaces are available: + lrng_archrandom.archrandom for the CPU entropy source, + lrng_jent.jitterrng for the Jitter RNG entropy source, and + lrng_sw_noise.irq_entropy for the interrupt entropy source. + +config LRNG_RUNTIME_MAX_WO_RESEED_CONFIG + bool "Enable runtime configuration of max reseed threshold" + help + When enabling this option, the LRNG provides an interface + allowing the setting of the maximum number of DRNG generate + operations without a reseed that has full entropy. The + interface is lrng_drng.max_wo_reseed. + +config LRNG_TEST_CPU_ES_COMPRESSION + bool "Force CPU ES compression operation" + help + When enabling this option, the CPU ES compression operation + is forced by setting an arbitrary value > 1 for the data + multiplier even when the CPU ES would deliver full entropy. + This allows testing of the compression operation. It + therefore forces to pull more data from the CPU ES + than what may be required. + +config LRNG_TESTING + bool + default y if (LRNG_RAW_HIRES_ENTROPY || LRNG_RAW_JIFFIES_ENTROPY ||LRNG_RAW_IRQ_ENTROPY || LRNG_RAW_IRQFLAGS_ENTROPY || LRNG_RAW_RETIP_ENTROPY || LRNG_RAW_REGS_ENTROPY || LRNG_RAW_ARRAY || LRNG_IRQ_PERF || LRNG_ACVT_HASH) + +endif #LRNG_TESTING_MENU + +config LRNG_SELFTEST + bool "Enable power-on and on-demand self-tests" + help + The power-on self-tests are executed during boot time + covering the ChaCha20 DRNG, the hash operation used for + processing the entropy pools and the auxiliary pool, and + the time stamp management of the LRNG. + + The on-demand self-tests are triggered by writing any + value into the SysFS file selftest_status. At the same + time, when reading this file, the test status is + returned. A zero indicates that all tests were executed + successfully. + + If unsure, say Y. + +if LRNG_SELFTEST + +config LRNG_SELFTEST_PANIC + bool "Panic the kernel upon self-test failure" + help + If the option is enabled, the kernel is terminated if an + LRNG power-on self-test failure is detected. + +endif # LRNG_SELFTEST + +endif # LRNG + +endmenu # LRNG diff --git a/drivers/char/lrng/Makefile b/drivers/char/lrng/Makefile new file mode 100644 index 0000000000000..e4f7f9702eb41 --- /dev/null +++ b/drivers/char/lrng/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux Random Number Generator. +# + +obj-y += lrng_es_mgr.o lrng_aux.o \ + lrng_drng.o lrng_chacha20.o \ + lrng_interfaces.o lrng_es_aux.o + +obj-$(CONFIG_LRNG_IRQ) += lrng_es_irq.o +obj-$(CONFIG_SYSCTL) += lrng_proc.o +obj-$(CONFIG_NUMA) += lrng_numa.o +obj-$(CONFIG_LRNG_CPU) += lrng_es_archrandom.o +obj-$(CONFIG_LRNG_DRNG_SWITCH) += lrng_switch.o +obj-$(CONFIG_LRNG_KCAPI_HASH) += lrng_kcapi_hash.o +obj-$(CONFIG_LRNG_DRBG) += lrng_drbg.o +obj-$(CONFIG_LRNG_KCAPI) += lrng_kcapi.o +obj-$(CONFIG_LRNG_JENT) += lrng_es_jent.o +obj-$(CONFIG_LRNG_HEALTH_TESTS) += lrng_health.o +obj-$(CONFIG_LRNG_TESTING) += lrng_testing.o +obj-$(CONFIG_LRNG_SELFTEST) += lrng_selftest.o diff --git a/drivers/char/lrng/lrng_aux.c b/drivers/char/lrng/lrng_aux.c new file mode 100644 index 0000000000000..e3b994f6e4c14 --- /dev/null +++ b/drivers/char/lrng/lrng_aux.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG auxiliary interfaces + * + * Copyright (C) 2019 - 2021 Stephan Mueller + * Copyright (C) 2017 Jason A. Donenfeld . All + * Rights Reserved. + * Copyright (C) 2016 Jason Cooper + */ + +#include +#include + +#include "lrng_internal.h" + +struct batched_entropy { + union { + u64 entropy_u64[LRNG_DRNG_BLOCKSIZE / sizeof(u64)]; + u32 entropy_u32[LRNG_DRNG_BLOCKSIZE / sizeof(u32)]; + }; + unsigned int position; + spinlock_t batch_lock; +}; + +/* + * Get a random word for internal kernel use only. The quality of the random + * number is as good as /dev/urandom, but there is no backtrack protection, + * with the goal of being quite fast and not depleting entropy. + */ +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), +}; + +u64 get_random_u64(void) +{ + u64 ret; + unsigned long flags; + struct batched_entropy *batch; + + lrng_debug_report_seedlevel("get_random_u64"); + + batch = raw_cpu_ptr(&batched_entropy_u64); + spin_lock_irqsave(&batch->batch_lock, flags); + if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { + lrng_drng_get_atomic((u8 *)batch->entropy_u64, + LRNG_DRNG_BLOCKSIZE); + batch->position = 0; + } + ret = batch->entropy_u64[batch->position++]; + spin_unlock_irqrestore(&batch->batch_lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u64); + +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), +}; + +u32 get_random_u32(void) +{ + u32 ret; + unsigned long flags; + struct batched_entropy *batch; + + lrng_debug_report_seedlevel("get_random_u32"); + + batch = raw_cpu_ptr(&batched_entropy_u32); + spin_lock_irqsave(&batch->batch_lock, flags); + if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { + lrng_drng_get_atomic((u8 *)batch->entropy_u32, + LRNG_DRNG_BLOCKSIZE); + batch->position = 0; + } + ret = batch->entropy_u32[batch->position++]; + spin_unlock_irqrestore(&batch->batch_lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u32); + +/* + * It's important to invalidate all potential batched entropy that might + * be stored before the crng is initialized, which we can do lazily by + * simply resetting the counter to zero so that it's re-extracted on the + * next usage. + */ +void invalidate_batched_entropy(void) +{ + int cpu; + unsigned long flags; + + for_each_possible_cpu(cpu) { + struct batched_entropy *batched_entropy; + + batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); + spin_lock_irqsave(&batched_entropy->batch_lock, flags); + batched_entropy->position = 0; + spin_unlock(&batched_entropy->batch_lock); + + batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu); + spin_lock(&batched_entropy->batch_lock); + batched_entropy->position = 0; + spin_unlock_irqrestore(&batched_entropy->batch_lock, flags); + } +} + +/* + * randomize_page - Generate a random, page aligned address + * @start: The smallest acceptable address the caller will take. + * @range: The size of the area, starting at @start, within which the + * random address must fall. + * + * If @start + @range would overflow, @range is capped. + * + * NOTE: Historical use of randomize_range, which this replaces, presumed that + * @start was already page aligned. We now align it regardless. + * + * Return: A page aligned address within [start, start + range). On error, + * @start is returned. + */ +unsigned long randomize_page(unsigned long start, unsigned long range) +{ + if (!PAGE_ALIGNED(start)) { + range -= PAGE_ALIGN(start) - start; + start = PAGE_ALIGN(start); + } + + if (start > ULONG_MAX - range) + range = ULONG_MAX - start; + + range >>= PAGE_SHIFT; + + if (range == 0) + return start; + + return start + (get_random_long() % range << PAGE_SHIFT); +} diff --git a/drivers/char/lrng/lrng_chacha20.c b/drivers/char/lrng/lrng_chacha20.c new file mode 100644 index 0000000000000..b5387bb33095e --- /dev/null +++ b/drivers/char/lrng/lrng_chacha20.c @@ -0,0 +1,321 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the cryptographic primitives using + * ChaCha20 cipher implementations. + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "lrng_chacha20.h" +#include "lrng_internal.h" + +/******************************* ChaCha20 DRNG *******************************/ + +#define CHACHA_BLOCK_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) + +struct chacha20_state { + struct chacha20_block block; +}; + +/* + * Have a static memory blocks for the ChaCha20 DRNG instance to avoid calling + * kmalloc too early in the boot cycle. For subsequent allocation requests, + * such as per-NUMA-node DRNG instances, kmalloc will be used. + */ +struct chacha20_state chacha20; + +/* + * Update of the ChaCha20 state by either using an unused buffer part or by + * generating one ChaCha20 block which is half of the state of the ChaCha20. + * The block is XORed into the key part of the state. This shall ensure + * backtracking resistance as well as a proper mix of the ChaCha20 state once + * the key is injected. + */ +static void lrng_chacha20_update(struct chacha20_state *chacha20_state, + __le32 *buf, u32 used_words) +{ + struct chacha20_block *chacha20 = &chacha20_state->block; + u32 i; + __le32 tmp[CHACHA_BLOCK_WORDS]; + + BUILD_BUG_ON(sizeof(struct chacha20_block) != CHACHA_BLOCK_SIZE); + BUILD_BUG_ON(CHACHA_BLOCK_SIZE != 2 * CHACHA_KEY_SIZE); + + if (used_words > CHACHA_KEY_SIZE_WORDS) { + chacha20_block(&chacha20->constants[0], (u8 *)tmp); + for (i = 0; i < CHACHA_KEY_SIZE_WORDS; i++) + chacha20->key.u[i] ^= le32_to_cpu(tmp[i]); + memzero_explicit(tmp, sizeof(tmp)); + } else { + for (i = 0; i < CHACHA_KEY_SIZE_WORDS; i++) + chacha20->key.u[i] ^= le32_to_cpu(buf[i + used_words]); + } + + /* Deterministic increment of nonce as required in RFC 7539 chapter 4 */ + chacha20->nonce[0]++; + if (chacha20->nonce[0] == 0) { + chacha20->nonce[1]++; + if (chacha20->nonce[1] == 0) + chacha20->nonce[2]++; + } + + /* Leave counter untouched as it is start value is undefined in RFC */ +} + +/* + * Seed the ChaCha20 DRNG by injecting the input data into the key part of + * the ChaCha20 state. If the input data is longer than the ChaCha20 key size, + * perform a ChaCha20 operation after processing of key size input data. + * This operation shall spread out the entropy into the ChaCha20 state before + * new entropy is injected into the key part. + */ +static int lrng_cc20_drng_seed_helper(void *drng, const u8 *inbuf, u32 inbuflen) +{ + struct chacha20_state *chacha20_state = (struct chacha20_state *)drng; + struct chacha20_block *chacha20 = &chacha20_state->block; + + while (inbuflen) { + u32 i, todo = min_t(u32, inbuflen, CHACHA_KEY_SIZE); + + for (i = 0; i < todo; i++) + chacha20->key.b[i] ^= inbuf[i]; + + /* Break potential dependencies between the inbuf key blocks */ + lrng_chacha20_update(chacha20_state, NULL, + CHACHA_BLOCK_WORDS); + inbuf += todo; + inbuflen -= todo; + } + + return 0; +} + +/* + * Chacha20 DRNG generation of random numbers: the stream output of ChaCha20 + * is the random number. After the completion of the generation of the + * stream, the entire ChaCha20 state is updated. + * + * Note, as the ChaCha20 implements a 32 bit counter, we must ensure + * that this function is only invoked for at most 2^32 - 1 ChaCha20 blocks + * before a reseed or an update happens. This is ensured by the variable + * outbuflen which is a 32 bit integer defining the number of bytes to be + * generated by the ChaCha20 DRNG. At the end of this function, an update + * operation is invoked which implies that the 32 bit counter will never be + * overflown in this implementation. + */ +static int lrng_cc20_drng_generate_helper(void *drng, u8 *outbuf, u32 outbuflen) +{ + struct chacha20_state *chacha20_state = (struct chacha20_state *)drng; + struct chacha20_block *chacha20 = &chacha20_state->block; + __le32 aligned_buf[CHACHA_BLOCK_WORDS]; + u32 ret = outbuflen, used = CHACHA_BLOCK_WORDS; + int zeroize_buf = 0; + + while (outbuflen >= CHACHA_BLOCK_SIZE) { + chacha20_block(&chacha20->constants[0], outbuf); + outbuf += CHACHA_BLOCK_SIZE; + outbuflen -= CHACHA_BLOCK_SIZE; + } + + if (outbuflen) { + chacha20_block(&chacha20->constants[0], (u8 *)aligned_buf); + memcpy(outbuf, aligned_buf, outbuflen); + used = ((outbuflen + sizeof(aligned_buf[0]) - 1) / + sizeof(aligned_buf[0])); + zeroize_buf = 1; + } + + lrng_chacha20_update(chacha20_state, aligned_buf, used); + + if (zeroize_buf) + memzero_explicit(aligned_buf, sizeof(aligned_buf)); + + return ret; +} + +void lrng_cc20_init_state(struct chacha20_state *state) +{ + lrng_cc20_init_rfc7539(&state->block); +} + +/* + * Allocation of the DRNG state + */ +static void *lrng_cc20_drng_alloc(u32 sec_strength) +{ + struct chacha20_state *state = NULL; + + if (sec_strength > CHACHA_KEY_SIZE) { + pr_err("Security strength of ChaCha20 DRNG (%u bits) lower than requested by LRNG (%u bits)\n", + CHACHA_KEY_SIZE * 8, sec_strength * 8); + return ERR_PTR(-EINVAL); + } + if (sec_strength < CHACHA_KEY_SIZE) + pr_warn("Security strength of ChaCha20 DRNG (%u bits) higher than requested by LRNG (%u bits)\n", + CHACHA_KEY_SIZE * 8, sec_strength * 8); + + state = kmalloc(sizeof(struct chacha20_state), GFP_KERNEL); + if (!state) + return ERR_PTR(-ENOMEM); + pr_debug("memory for ChaCha20 core allocated\n"); + + lrng_cc20_init_state(state); + + return state; +} + +static void lrng_cc20_drng_dealloc(void *drng) +{ + struct chacha20_state *chacha20_state = (struct chacha20_state *)drng; + + if (drng == &chacha20) { + memzero_explicit(chacha20_state, sizeof(*chacha20_state)); + pr_debug("static ChaCha20 core zeroized\n"); + return; + } + + pr_debug("ChaCha20 core zeroized and freed\n"); + kfree_sensitive(chacha20_state); +} + +/******************************* Hash Operation *******************************/ + +#ifdef CONFIG_CRYPTO_LIB_SHA256 + +#include + +static u32 lrng_cc20_hash_digestsize(void *hash) +{ + return SHA256_DIGEST_SIZE; +} + +static int lrng_cc20_hash_init(struct shash_desc *shash, void *hash) +{ + /* + * We do not need a TFM - we only need sufficient space for + * struct sha256_state on the stack. + */ + sha256_init(shash_desc_ctx(shash)); + return 0; +} + +static int lrng_cc20_hash_update(struct shash_desc *shash, + const u8 *inbuf, u32 inbuflen) +{ + sha256_update(shash_desc_ctx(shash), inbuf, inbuflen); + return 0; +} + +static int lrng_cc20_hash_final(struct shash_desc *shash, u8 *digest) +{ + sha256_final(shash_desc_ctx(shash), digest); + return 0; +} + +static const char *lrng_cc20_hash_name(void) +{ + return "SHA-256"; +} + +static void lrng_cc20_hash_desc_zero(struct shash_desc *shash) +{ + memzero_explicit(shash_desc_ctx(shash), sizeof(struct sha256_state)); +} + +#else /* CONFIG_CRYPTO_LIB_SHA256 */ + +#include +#include + +/* + * If the SHA-256 support is not compiled, we fall back to SHA-1 that is always + * compiled and present in the kernel. + */ +static u32 lrng_cc20_hash_digestsize(void *hash) +{ + return SHA1_DIGEST_SIZE; +} + +static void lrng_sha1_block_fn(struct sha1_state *sctx, const u8 *src, + int blocks) +{ + u32 temp[SHA1_WORKSPACE_WORDS]; + + while (blocks--) { + sha1_transform(sctx->state, src, temp); + src += SHA1_BLOCK_SIZE; + } + memzero_explicit(temp, sizeof(temp)); +} + +static int lrng_cc20_hash_init(struct shash_desc *shash, void *hash) +{ + /* + * We do not need a TFM - we only need sufficient space for + * struct sha1_state on the stack. + */ + sha1_base_init(shash); + return 0; +} + +static int lrng_cc20_hash_update(struct shash_desc *shash, + const u8 *inbuf, u32 inbuflen) +{ + return sha1_base_do_update(shash, inbuf, inbuflen, lrng_sha1_block_fn); +} + +static int lrng_cc20_hash_final(struct shash_desc *shash, u8 *digest) +{ + return sha1_base_do_finalize(shash, lrng_sha1_block_fn) ?: + sha1_base_finish(shash, digest); +} + +static const char *lrng_cc20_hash_name(void) +{ + return "SHA-1"; +} + +static void lrng_cc20_hash_desc_zero(struct shash_desc *shash) +{ + memzero_explicit(shash_desc_ctx(shash), sizeof(struct sha1_state)); +} + +#endif /* CONFIG_CRYPTO_LIB_SHA256 */ + +static void *lrng_cc20_hash_alloc(void) +{ + pr_info("Hash %s allocated\n", lrng_cc20_hash_name()); + return NULL; +} + +static void lrng_cc20_hash_dealloc(void *hash) +{ +} + +static const char *lrng_cc20_drng_name(void) +{ + return "ChaCha20 DRNG"; +} + +const struct lrng_crypto_cb lrng_cc20_crypto_cb = { + .lrng_drng_name = lrng_cc20_drng_name, + .lrng_hash_name = lrng_cc20_hash_name, + .lrng_drng_alloc = lrng_cc20_drng_alloc, + .lrng_drng_dealloc = lrng_cc20_drng_dealloc, + .lrng_drng_seed_helper = lrng_cc20_drng_seed_helper, + .lrng_drng_generate_helper = lrng_cc20_drng_generate_helper, + .lrng_hash_alloc = lrng_cc20_hash_alloc, + .lrng_hash_dealloc = lrng_cc20_hash_dealloc, + .lrng_hash_digestsize = lrng_cc20_hash_digestsize, + .lrng_hash_init = lrng_cc20_hash_init, + .lrng_hash_update = lrng_cc20_hash_update, + .lrng_hash_final = lrng_cc20_hash_final, + .lrng_hash_desc_zero = lrng_cc20_hash_desc_zero, +}; diff --git a/drivers/char/lrng/lrng_chacha20.h b/drivers/char/lrng/lrng_chacha20.h new file mode 100644 index 0000000000000..bd0c0bee38f35 --- /dev/null +++ b/drivers/char/lrng/lrng_chacha20.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * LRNG ChaCha20 definitions + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#include + +/* State according to RFC 7539 section 2.3 */ +struct chacha20_block { + u32 constants[4]; + union { +#define CHACHA_KEY_SIZE_WORDS (CHACHA_KEY_SIZE / sizeof(u32)) + u32 u[CHACHA_KEY_SIZE_WORDS]; + u8 b[CHACHA_KEY_SIZE]; + } key; + u32 counter; + u32 nonce[3]; +}; + +static inline void lrng_cc20_init_rfc7539(struct chacha20_block *chacha20) +{ + chacha_init_consts(chacha20->constants); +} diff --git a/drivers/char/lrng/lrng_drbg.c b/drivers/char/lrng/lrng_drbg.c new file mode 100644 index 0000000000000..6ca6b05eccf4c --- /dev/null +++ b/drivers/char/lrng/lrng_drbg.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the cryptographic primitives using the + * kernel crypto API and its DRBG. + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "lrng_kcapi_hash.h" + +/* + * Define a DRBG plus a hash / MAC used to extract data from the entropy pool. + * For LRNG_HASH_NAME you can use a hash or a MAC (HMAC or CMAC) of your choice + * (Note, you should use the suggested selections below -- using SHA-1 or MD5 + * is not wise). The idea is that the used cipher primitive can be selected to + * be the same as used for the DRBG. I.e. the LRNG only uses one cipher + * primitive using the same cipher implementation with the options offered in + * the following. This means, if the CTR DRBG is selected and AES-NI is present, + * both the CTR DRBG and the selected cmac(aes) use AES-NI. + * + * The security strengths of the DRBGs are all 256 bits according to + * SP800-57 section 5.6.1. + * + * This definition is allowed to be changed. + */ +#ifdef CONFIG_CRYPTO_DRBG_CTR +static unsigned int lrng_drbg_type = 0; +#elif defined CONFIG_CRYPTO_DRBG_HMAC +static unsigned int lrng_drbg_type = 1; +#elif defined CONFIG_CRYPTO_DRBG_HASH +static unsigned int lrng_drbg_type = 2; +#else +#error "Unknown DRBG in use" +#endif + +/* The parameter must be r/o in sysfs as otherwise races appear. */ +module_param(lrng_drbg_type, uint, 0444); +MODULE_PARM_DESC(lrng_drbg_type, "DRBG type used for LRNG (0->CTR_DRBG, 1->HMAC_DRBG, 2->Hash_DRBG)"); + +struct lrng_drbg { + const char *hash_name; + const char *drbg_core; +}; + +static const struct lrng_drbg lrng_drbg_types[] = { + { /* CTR_DRBG with AES-256 using derivation function */ + .hash_name = "sha512", + .drbg_core = "drbg_nopr_ctr_aes256", + }, { /* HMAC_DRBG with SHA-512 */ + .hash_name = "sha512", + .drbg_core = "drbg_nopr_hmac_sha512", + }, { /* Hash_DRBG with SHA-512 using derivation function */ + .hash_name = "sha512", + .drbg_core = "drbg_nopr_sha512" + } +}; + +static int lrng_drbg_drng_seed_helper(void *drng, const u8 *inbuf, u32 inbuflen) +{ + struct drbg_state *drbg = (struct drbg_state *)drng; + LIST_HEAD(seedlist); + struct drbg_string data; + int ret; + + drbg_string_fill(&data, inbuf, inbuflen); + list_add_tail(&data.list, &seedlist); + ret = drbg->d_ops->update(drbg, &seedlist, drbg->seeded); + + if (ret >= 0) + drbg->seeded = true; + + return ret; +} + +static int lrng_drbg_drng_generate_helper(void *drng, u8 *outbuf, u32 outbuflen) +{ + struct drbg_state *drbg = (struct drbg_state *)drng; + + return drbg->d_ops->generate(drbg, outbuf, outbuflen, NULL); +} + +static void *lrng_drbg_drng_alloc(u32 sec_strength) +{ + struct drbg_state *drbg; + int coreref = -1; + bool pr = false; + int ret; + + drbg_convert_tfm_core(lrng_drbg_types[lrng_drbg_type].drbg_core, + &coreref, &pr); + if (coreref < 0) + return ERR_PTR(-EFAULT); + + drbg = kzalloc(sizeof(struct drbg_state), GFP_KERNEL); + if (!drbg) + return ERR_PTR(-ENOMEM); + + drbg->core = &drbg_cores[coreref]; + drbg->seeded = false; + ret = drbg_alloc_state(drbg); + if (ret) + goto err; + + if (sec_strength > drbg_sec_strength(drbg->core->flags)) { + pr_err("Security strength of DRBG (%u bits) lower than requested by LRNG (%u bits)\n", + drbg_sec_strength(drbg->core->flags) * 8, + sec_strength * 8); + goto dealloc; + } + + if (sec_strength < drbg_sec_strength(drbg->core->flags)) + pr_warn("Security strength of DRBG (%u bits) higher than requested by LRNG (%u bits)\n", + drbg_sec_strength(drbg->core->flags) * 8, + sec_strength * 8); + + pr_info("DRBG with %s core allocated\n", drbg->core->backend_cra_name); + + return drbg; + +dealloc: + if (drbg->d_ops) + drbg->d_ops->crypto_fini(drbg); + drbg_dealloc_state(drbg); +err: + kfree(drbg); + return ERR_PTR(-EINVAL); +} + +static void lrng_drbg_drng_dealloc(void *drng) +{ + struct drbg_state *drbg = (struct drbg_state *)drng; + + if (drbg && drbg->d_ops) + drbg->d_ops->crypto_fini(drbg); + drbg_dealloc_state(drbg); + kfree_sensitive(drbg); + pr_info("DRBG deallocated\n"); +} + +static void *lrng_drbg_hash_alloc(void) +{ + return lrng_kcapi_hash_alloc(lrng_drbg_types[lrng_drbg_type].hash_name); +} + +static const char *lrng_drbg_name(void) +{ + return lrng_drbg_types[lrng_drbg_type].drbg_core; +} + +static const char *lrng_hash_name(void) +{ + return lrng_drbg_types[lrng_drbg_type].hash_name; +} + +static const struct lrng_crypto_cb lrng_drbg_crypto_cb = { + .lrng_drng_name = lrng_drbg_name, + .lrng_hash_name = lrng_hash_name, + .lrng_drng_alloc = lrng_drbg_drng_alloc, + .lrng_drng_dealloc = lrng_drbg_drng_dealloc, + .lrng_drng_seed_helper = lrng_drbg_drng_seed_helper, + .lrng_drng_generate_helper = lrng_drbg_drng_generate_helper, + .lrng_hash_alloc = lrng_drbg_hash_alloc, + .lrng_hash_dealloc = lrng_kcapi_hash_dealloc, + .lrng_hash_digestsize = lrng_kcapi_hash_digestsize, + .lrng_hash_init = lrng_kcapi_hash_init, + .lrng_hash_update = lrng_kcapi_hash_update, + .lrng_hash_final = lrng_kcapi_hash_final, + .lrng_hash_desc_zero = lrng_kcapi_hash_zero, +}; + +static int __init lrng_drbg_init(void) +{ + if (lrng_drbg_type >= ARRAY_SIZE(lrng_drbg_types)) { + pr_err("lrng_drbg_type parameter too large (given %u - max: %lu)", + lrng_drbg_type, + (unsigned long)ARRAY_SIZE(lrng_drbg_types) - 1); + return -EAGAIN; + } + return lrng_set_drng_cb(&lrng_drbg_crypto_cb); +} + +static void __exit lrng_drbg_exit(void) +{ + lrng_set_drng_cb(NULL); +} + +late_initcall(lrng_drbg_init); +module_exit(lrng_drbg_exit); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Linux Random Number Generator - SP800-90A DRBG backend"); diff --git a/drivers/char/lrng/lrng_drng.c b/drivers/char/lrng/lrng_drng.c new file mode 100644 index 0000000000000..b1d89a6f548d7 --- /dev/null +++ b/drivers/char/lrng/lrng_drng.c @@ -0,0 +1,451 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG DRNG processing + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_internal.h" + +/* + * Maximum number of seconds between DRNG reseed intervals of the DRNG. Note, + * this is enforced with the next request of random numbers from the + * DRNG. Setting this value to zero implies a reseeding attempt before every + * generated random number. + */ +int lrng_drng_reseed_max_time = 600; + +static atomic_t lrng_avail = ATOMIC_INIT(0); + +DEFINE_MUTEX(lrng_crypto_cb_update); + +/* DRNG for /dev/urandom, getrandom(2), get_random_bytes */ +static struct lrng_drng lrng_drng_init = { + .drng = &chacha20, + .crypto_cb = &lrng_cc20_crypto_cb, + .lock = __MUTEX_INITIALIZER(lrng_drng_init.lock), + .spin_lock = __SPIN_LOCK_UNLOCKED(lrng_drng_init.spin_lock), + .hash_lock = __RW_LOCK_UNLOCKED(lrng_drng_init.hash_lock) +}; + +/* + * DRNG for get_random_bytes when called in atomic context. This + * DRNG will always use the ChaCha20 DRNG. It will never benefit from a + * DRNG switch like the "regular" DRNG. If there was no DRNG switch, the atomic + * DRNG is identical to the "regular" DRNG. + * + * The reason for having this is due to the fact that DRNGs other than + * the ChaCha20 DRNG may sleep. + */ +static struct lrng_drng lrng_drng_atomic = { + .drng = &chacha20, + .crypto_cb = &lrng_cc20_crypto_cb, + .spin_lock = __SPIN_LOCK_UNLOCKED(lrng_drng_atomic.spin_lock), + .hash_lock = __RW_LOCK_UNLOCKED(lrng_drng_atomic.hash_lock) +}; + +static u32 max_wo_reseed = LRNG_DRNG_MAX_WITHOUT_RESEED; +#ifdef CONFIG_LRNG_RUNTIME_MAX_WO_RESEED_CONFIG +module_param(max_wo_reseed, uint, 0444); +MODULE_PARM_DESC(max_wo_reseed, + "Maximum number of DRNG generate operation without full reseed\n"); +#endif + +/********************************** Helper ************************************/ + +bool lrng_get_available(void) +{ + return likely(atomic_read(&lrng_avail)); +} + +void lrng_set_available(void) +{ + atomic_set(&lrng_avail, 1); +} + +struct lrng_drng *lrng_drng_init_instance(void) +{ + return &lrng_drng_init; +} + +struct lrng_drng *lrng_drng_atomic_instance(void) +{ + return &lrng_drng_atomic; +} + +void lrng_drng_reset(struct lrng_drng *drng) +{ + atomic_set(&drng->requests, LRNG_DRNG_RESEED_THRESH); + atomic_set(&drng->requests_since_fully_seeded, 0); + drng->last_seeded = jiffies; + drng->fully_seeded = false; + drng->force_reseed = true; + pr_debug("reset DRNG\n"); +} + +/* Initialize the default DRNG during boot */ +static void lrng_drng_seed(struct lrng_drng *drng); +void lrng_drngs_init_cc20(bool force_seed) +{ + unsigned long flags = 0; + + if (lrng_get_available()) + return; + + lrng_drng_lock(&lrng_drng_init, &flags); + if (lrng_get_available()) { + lrng_drng_unlock(&lrng_drng_init, &flags); + if (force_seed) + goto seed; + return; + } + + lrng_drng_reset(&lrng_drng_init); + lrng_cc20_init_state(&chacha20); + lrng_drng_unlock(&lrng_drng_init, &flags); + + lrng_drng_lock(&lrng_drng_atomic, &flags); + lrng_drng_reset(&lrng_drng_atomic); + /* + * We do not initialize the state of the atomic DRNG as it is identical + * to the DRNG at this point. + */ + lrng_drng_unlock(&lrng_drng_atomic, &flags); + + lrng_set_available(); + +seed: + /* Seed the DRNG with any entropy available */ + if (!lrng_pool_trylock()) { + lrng_drng_seed(&lrng_drng_init); + pr_info("ChaCha20 core initialized with first seeding\n"); + lrng_pool_unlock(); + } else { + pr_info("ChaCha20 core initialized without seeding\n"); + } +} + +bool lrng_sp80090c_compliant(void) +{ + if (!IS_ENABLED(CONFIG_LRNG_OVERSAMPLE_ENTROPY_SOURCES)) + return false; + + /* Entropy source hash must be capable of transporting enough entropy */ + if (lrng_get_digestsize() < + (lrng_security_strength() + CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS)) + return false; + + /* SP800-90C only requested in FIPS mode */ + return fips_enabled; +} + +/************************* Random Number Generation ***************************/ + +/* Inject a data buffer into the DRNG */ +static void lrng_drng_inject(struct lrng_drng *drng, + const u8 *inbuf, u32 inbuflen, bool fully_seeded) +{ + const char *drng_type = unlikely(drng == &lrng_drng_atomic) ? + "atomic" : "regular"; + unsigned long flags = 0; + + BUILD_BUG_ON(LRNG_DRNG_RESEED_THRESH > INT_MAX); + pr_debug("seeding %s DRNG with %u bytes\n", drng_type, inbuflen); + lrng_drng_lock(drng, &flags); + if (drng->crypto_cb->lrng_drng_seed_helper(drng->drng, + inbuf, inbuflen) < 0) { + pr_warn("seeding of %s DRNG failed\n", drng_type); + drng->force_reseed = true; + } else { + int gc = LRNG_DRNG_RESEED_THRESH - atomic_read(&drng->requests); + + pr_debug("%s DRNG stats since last seeding: %lu secs; generate calls: %d\n", + drng_type, + (time_after(jiffies, drng->last_seeded) ? + (jiffies - drng->last_seeded) : 0) / HZ, gc); + + /* Count the numbers of generate ops since last fully seeded */ + if (fully_seeded) + atomic_set(&drng->requests_since_fully_seeded, 0); + else + atomic_add(gc, &drng->requests_since_fully_seeded); + + drng->last_seeded = jiffies; + atomic_set(&drng->requests, LRNG_DRNG_RESEED_THRESH); + drng->force_reseed = false; + + if (!drng->fully_seeded) { + drng->fully_seeded = fully_seeded; + if (drng->fully_seeded) + pr_debug("DRNG fully seeded\n"); + } + + if (drng->drng == lrng_drng_atomic.drng) { + lrng_drng_atomic.last_seeded = jiffies; + atomic_set(&lrng_drng_atomic.requests, + LRNG_DRNG_RESEED_THRESH); + lrng_drng_atomic.force_reseed = false; + } + } + lrng_drng_unlock(drng, &flags); +} + +/* + * Perform the seeding of the DRNG with data from noise source + */ +static void _lrng_drng_seed(struct lrng_drng *drng) +{ + struct entropy_buf seedbuf __aligned(LRNG_KCAPI_ALIGN); + + lrng_fill_seed_buffer(&seedbuf, + lrng_get_seed_entropy_osr(drng->fully_seeded)); + lrng_init_ops(&seedbuf); + lrng_drng_inject(drng, (u8 *)&seedbuf, sizeof(seedbuf), + lrng_fully_seeded(drng->fully_seeded, &seedbuf)); + memzero_explicit(&seedbuf, sizeof(seedbuf)); +} + +static int lrng_drng_get(struct lrng_drng *drng, u8 *outbuf, u32 outbuflen); +static void lrng_drng_seed(struct lrng_drng *drng) +{ + _lrng_drng_seed(drng); + + BUILD_BUG_ON(LRNG_MIN_SEED_ENTROPY_BITS > + LRNG_DRNG_SECURITY_STRENGTH_BITS); + + /* + * Reseed atomic DRNG from current DRNG, + * + * We can obtain random numbers from DRNG as the lock type + * chosen by lrng_drng_get is usable with the current caller. + */ + if ((drng->drng != lrng_drng_atomic.drng) && + (lrng_drng_atomic.force_reseed || + atomic_read(&lrng_drng_atomic.requests) <= 0 || + time_after(jiffies, lrng_drng_atomic.last_seeded + + lrng_drng_reseed_max_time * HZ))) { + u8 seedbuf[LRNG_DRNG_SECURITY_STRENGTH_BYTES] + __aligned(LRNG_KCAPI_ALIGN); + int ret = lrng_drng_get(drng, seedbuf, sizeof(seedbuf)); + + if (ret < 0) { + pr_warn("Error generating random numbers for atomic DRNG: %d\n", + ret); + } else { + lrng_drng_inject(&lrng_drng_atomic, seedbuf, ret, true); + } + memzero_explicit(&seedbuf, sizeof(seedbuf)); + } +} + +static void _lrng_drng_seed_work(struct lrng_drng *drng, u32 node) +{ + pr_debug("reseed triggered by interrupt noise source for DRNG on NUMA node %d\n", + node); + lrng_drng_seed(drng); + if (drng->fully_seeded) { + /* Prevent reseed storm */ + drng->last_seeded += node * 100 * HZ; + /* Prevent draining of pool on idle systems */ + lrng_drng_reseed_max_time += 100; + } +} + +/* + * DRNG reseed trigger: Kernel thread handler triggered by the schedule_work() + */ +void lrng_drng_seed_work(struct work_struct *dummy) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + u32 node; + + if (lrng_drng) { + for_each_online_node(node) { + struct lrng_drng *drng = lrng_drng[node]; + + if (drng && !drng->fully_seeded) { + _lrng_drng_seed_work(drng, node); + goto out; + } + } + } else { + if (!lrng_drng_init.fully_seeded) { + _lrng_drng_seed_work(&lrng_drng_init, 0); + goto out; + } + } + + lrng_pool_all_numa_nodes_seeded(true); + +out: + /* Allow the seeding operation to be called again */ + lrng_pool_unlock(); +} + +/* Force all DRNGs to reseed before next generation */ +void lrng_drng_force_reseed(void) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + u32 node; + + /* + * If the initial DRNG is over the reseed threshold, allow a forced + * reseed only for the initial DRNG as this is the fallback for all. It + * must be kept seeded before all others to keep the LRNG operational. + */ + if (!lrng_drng || + (atomic_read_u32(&lrng_drng_init.requests_since_fully_seeded) > + LRNG_DRNG_RESEED_THRESH)) { + lrng_drng_init.force_reseed = lrng_drng_init.fully_seeded; + pr_debug("force reseed of initial DRNG\n"); + return; + } + for_each_online_node(node) { + struct lrng_drng *drng = lrng_drng[node]; + + if (!drng) + continue; + + drng->force_reseed = drng->fully_seeded; + pr_debug("force reseed of DRNG on node %u\n", node); + } + lrng_drng_atomic.force_reseed = lrng_drng_atomic.fully_seeded; +} + +/* + * lrng_drng_get() - Get random data out of the DRNG which is reseeded + * frequently. + * + * @outbuf: buffer for storing random data + * @outbuflen: length of outbuf + * + * Return: + * * < 0 in error case (DRNG generation or update failed) + * * >=0 returning the returned number of bytes + */ +static int lrng_drng_get(struct lrng_drng *drng, u8 *outbuf, u32 outbuflen) +{ + unsigned long flags = 0; + u32 processed = 0; + + if (!outbuf || !outbuflen) + return 0; + + outbuflen = min_t(size_t, outbuflen, INT_MAX); + + lrng_drngs_init_cc20(false); + + /* If DRNG operated without proper reseed for too long, block LRNG */ + BUILD_BUG_ON(LRNG_DRNG_MAX_WITHOUT_RESEED < LRNG_DRNG_RESEED_THRESH); + if (atomic_read_u32(&drng->requests_since_fully_seeded) > max_wo_reseed) + lrng_unset_fully_seeded(drng); + + while (outbuflen) { + u32 todo = min_t(u32, outbuflen, LRNG_DRNG_MAX_REQSIZE); + int ret; + + /* All but the atomic DRNG are seeded during generation */ + if (atomic_dec_and_test(&drng->requests) || + drng->force_reseed || + time_after(jiffies, drng->last_seeded + + lrng_drng_reseed_max_time * HZ)) { + if (likely(drng != &lrng_drng_atomic)) { + if (lrng_pool_trylock()) { + drng->force_reseed = true; + } else { + lrng_drng_seed(drng); + lrng_pool_unlock(); + } + } + } + + lrng_drng_lock(drng, &flags); + ret = drng->crypto_cb->lrng_drng_generate_helper( + drng->drng, outbuf + processed, todo); + lrng_drng_unlock(drng, &flags); + if (ret <= 0) { + pr_warn("getting random data from DRNG failed (%d)\n", + ret); + return -EFAULT; + } + processed += ret; + outbuflen -= ret; + } + + return processed; +} + +int lrng_drng_get_atomic(u8 *outbuf, u32 outbuflen) +{ + return lrng_drng_get(&lrng_drng_atomic, outbuf, outbuflen); +} + +int lrng_drng_get_sleep(u8 *outbuf, u32 outbuflen) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *drng = &lrng_drng_init; + int node = numa_node_id(); + + might_sleep(); + + if (lrng_drng && lrng_drng[node] && lrng_drng[node]->fully_seeded) + drng = lrng_drng[node]; + + return lrng_drng_get(drng, outbuf, outbuflen); +} + +/* Reset LRNG such that all existing entropy is gone */ +static void _lrng_reset(struct work_struct *work) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + unsigned long flags = 0; + + if (!lrng_drng) { + lrng_drng_lock(&lrng_drng_init, &flags); + lrng_drng_reset(&lrng_drng_init); + lrng_drng_unlock(&lrng_drng_init, &flags); + } else { + u32 node; + + for_each_online_node(node) { + struct lrng_drng *drng = lrng_drng[node]; + + if (!drng) + continue; + lrng_drng_lock(drng, &flags); + lrng_drng_reset(drng); + lrng_drng_unlock(drng, &flags); + } + } + lrng_set_entropy_thresh(LRNG_INIT_ENTROPY_BITS); + + lrng_reset_state(); +} + +static DECLARE_WORK(lrng_reset_work, _lrng_reset); + +void lrng_reset(void) +{ + schedule_work(&lrng_reset_work); +} + +/***************************** Initialize LRNG *******************************/ + +static int __init lrng_init(void) +{ + lrng_drngs_init_cc20(false); + + lrng_drngs_numa_alloc(); + return 0; +} + +late_initcall(lrng_init); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Linux Random Number Generator"); diff --git a/drivers/char/lrng/lrng_es_archrandom.c b/drivers/char/lrng/lrng_es_archrandom.c new file mode 100644 index 0000000000000..337f84fab1201 --- /dev/null +++ b/drivers/char/lrng/lrng_es_archrandom.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Fast Entropy Source: CPU-based entropy source + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "lrng_internal.h" + +/* + * Estimated entropy of data is a 32th of LRNG_DRNG_SECURITY_STRENGTH_BITS. + * As we have no ability to review the implementation of those noise sources, + * it is prudent to have a conservative estimate here. + */ +#define LRNG_ARCHRANDOM_DEFAULT_STRENGTH CONFIG_LRNG_CPU_ENTROPY_RATE +#define LRNG_ARCHRANDOM_TRUST_CPU_STRENGTH LRNG_DRNG_SECURITY_STRENGTH_BITS +#ifdef CONFIG_RANDOM_TRUST_CPU +static u32 archrandom = LRNG_ARCHRANDOM_TRUST_CPU_STRENGTH; +#else +static u32 archrandom = LRNG_ARCHRANDOM_DEFAULT_STRENGTH; +#endif +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(archrandom, uint, 0644); +MODULE_PARM_DESC(archrandom, "Entropy in bits of 256 data bits from CPU noise source (e.g. RDSEED)"); +#endif + +static int __init lrng_parse_trust_cpu(char *arg) +{ + int ret; + bool trust_cpu = false; + + ret = kstrtobool(arg, &trust_cpu); + if (ret) + return ret; + + if (trust_cpu) { + archrandom = LRNG_ARCHRANDOM_TRUST_CPU_STRENGTH; + lrng_pool_add_entropy(); + } else { + archrandom = LRNG_ARCHRANDOM_DEFAULT_STRENGTH; + } + + return 0; +} +early_param("random.trust_cpu", lrng_parse_trust_cpu); + +u32 lrng_archrandom_entropylevel(u32 requested_bits) +{ + return lrng_fast_noise_entropylevel(archrandom, requested_bits); +} + +static u32 lrng_get_arch_data(u8 *outbuf, u32 requested_bits) +{ + u32 i; + + /* operate on full blocks */ + BUILD_BUG_ON(LRNG_DRNG_SECURITY_STRENGTH_BYTES % sizeof(unsigned long)); + BUILD_BUG_ON(CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS % + sizeof(unsigned long)); + /* ensure we have aligned buffers */ + BUILD_BUG_ON(LRNG_KCAPI_ALIGN % sizeof(unsigned long)); + + for (i = 0; i < (requested_bits >> 3); + i += sizeof(unsigned long)) { + if (!arch_get_random_seed_long((unsigned long *)(outbuf + i)) && + !arch_get_random_long((unsigned long *)(outbuf + i))) { + archrandom = 0; + return 0; + } + } + + return requested_bits; +} + +static u32 lrng_get_arch_data_compress(u8 *outbuf, u32 requested_bits, + u32 data_multiplier) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_crypto_cb *crypto_cb; + struct lrng_drng *drng = lrng_drng_init_instance(); + unsigned long flags; + u32 ent_bits = 0, i, partial_bits = 0, + full_bits = requested_bits * data_multiplier; + void *hash; + + /* Calculate oversampling for SP800-90C */ + if (lrng_sp80090c_compliant()) { + /* Complete amount of bits to be pulled */ + full_bits += CONFIG_LRNG_OVERSAMPLE_ES_BITS * data_multiplier; + /* Full blocks that will be pulled */ + data_multiplier = full_bits / requested_bits; + /* Partial block in bits to be pulled */ + partial_bits = full_bits - (data_multiplier * requested_bits); + } + + lrng_hash_lock(drng, &flags); + crypto_cb = drng->crypto_cb; + hash = drng->hash; + + if (crypto_cb->lrng_hash_init(shash, hash)) + goto out; + + /* Hash all data from the CPU entropy source */ + for (i = 0; i < data_multiplier; i++) { + ent_bits = lrng_get_arch_data(outbuf, requested_bits); + if (!ent_bits) + goto out; + + if (crypto_cb->lrng_hash_update(shash, outbuf, ent_bits >> 3)) + goto err; + } + + /* Hash partial block, if applicable */ + ent_bits = lrng_get_arch_data(outbuf, partial_bits); + if (ent_bits && + crypto_cb->lrng_hash_update(shash, outbuf, ent_bits >> 3)) + goto err; + + pr_debug("pulled %u bits from CPU RNG entropy source\n", full_bits); + + /* Generate the compressed data to be returned to the caller */ + ent_bits = crypto_cb->lrng_hash_digestsize(hash) << 3; + if (requested_bits < ent_bits) { + u8 digest[LRNG_MAX_DIGESTSIZE]; + + if (crypto_cb->lrng_hash_final(shash, digest)) + goto err; + + /* Truncate output data to requested size */ + memcpy(outbuf, digest, requested_bits >> 3); + memzero_explicit(digest, crypto_cb->lrng_hash_digestsize(hash)); + ent_bits = requested_bits; + } else { + if (crypto_cb->lrng_hash_final(shash, outbuf)) + goto err; + } + +out: + crypto_cb->lrng_hash_desc_zero(shash); + lrng_hash_unlock(drng, flags); + return ent_bits; + +err: + ent_bits = 0; + goto out; +} + +/* + * If CPU entropy source requires does not return full entropy, return the + * multiplier of how much data shall be sampled from it. + */ +static u32 lrng_arch_multiplier(void) +{ + static u32 data_multiplier = 0; + unsigned long v; + + if (data_multiplier > 0) + return data_multiplier; + + if (IS_ENABLED(CONFIG_X86) && !arch_get_random_seed_long(&v)) { + /* + * Intel SPEC: pulling 512 blocks from RDRAND ensures + * one reseed making it logically equivalent to RDSEED. + */ + data_multiplier = 512; + } else if (IS_ENABLED(CONFIG_PPC)) { + /* + * PowerISA defines DARN to deliver at least 0.5 bits of + * entropy per data bit. + */ + data_multiplier = 2; + } else if (IS_ENABLED(CONFIG_RISCV)) { + /* + * riscv-crypto-spec-scalar-1.0.0-rc6.pdf section 4.2 defines + * this requirement. + */ + data_multiplier = 2; + } else { + /* CPU provides full entropy */ + data_multiplier = CONFIG_LRNG_CPU_FULL_ENT_MULTIPLIER; + } + return data_multiplier; +} + +/* + * lrng_get_arch() - Get CPU entropy source entropy + * + * @outbuf: buffer to store entropy of size requested_bits + * + * Return: + * * > 0 on success where value provides the added entropy in bits + * * 0 if no fast source was available + */ +u32 lrng_get_arch(u8 *outbuf, u32 requested_bits) +{ + u32 ent_bits, data_multiplier = lrng_arch_multiplier(); + + if (data_multiplier <= 1) { + ent_bits = lrng_get_arch_data(outbuf, requested_bits); + } else { + ent_bits = lrng_get_arch_data_compress(outbuf, requested_bits, + data_multiplier); + } + + ent_bits = lrng_archrandom_entropylevel(ent_bits); + pr_debug("obtained %u bits of entropy from CPU RNG entropy source\n", + ent_bits); + return ent_bits; +} + +void lrng_arch_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + u32 data_multiplier = lrng_arch_multiplier(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + "CPU ES properties:\n" + " Hash for compressing data: %s\n" + " Data multiplier: %u\n", + (data_multiplier <= 1) ? + "N/A" : lrng_drng_init->crypto_cb->lrng_hash_name(), + data_multiplier); +} diff --git a/drivers/char/lrng/lrng_es_aux.c b/drivers/char/lrng/lrng_es_aux.c new file mode 100644 index 0000000000000..c8a040d29f78e --- /dev/null +++ b/drivers/char/lrng/lrng_es_aux.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Slow Entropy Source: Auxiliary entropy pool + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "lrng_internal.h" + +/* + * This is the auxiliary pool + * + * The aux pool array is aligned to 8 bytes to comfort the kernel crypto API + * cipher implementations of the hash functions used to read the pool: for some + * accelerated implementations, we need an alignment to avoid a realignment + * which involves memcpy(). The alignment to 8 bytes should satisfy all crypto + * implementations. + */ +struct lrng_pool { + u8 aux_pool[LRNG_POOL_SIZE]; /* Aux pool: digest state */ + atomic_t aux_entropy_bits; + atomic_t digestsize; /* Digest size of used hash */ + bool initialized; /* Aux pool initialized? */ + + /* Serialize read of entropy pool and update of aux pool */ + spinlock_t lock; +}; + +static struct lrng_pool lrng_pool __aligned(LRNG_KCAPI_ALIGN) = { + .aux_entropy_bits = ATOMIC_INIT(0), + .digestsize = ATOMIC_INIT(LRNG_ATOMIC_DIGEST_SIZE), + .initialized = false, + .lock = __SPIN_LOCK_UNLOCKED(lrng_pool.lock) +}; + +/********************************** Helper ***********************************/ + +/* Entropy in bits present in aux pool */ +u32 lrng_avail_aux_entropy(void) +{ + /* Cap available entropy with max entropy */ + u32 avail_bits = min_t(u32, lrng_get_digestsize(), + atomic_read_u32(&lrng_pool.aux_entropy_bits)); + + /* Consider oversampling rate due to aux pool conditioning */ + return lrng_reduce_by_osr(avail_bits); +} + +/* Set the digest size of the used hash in bytes */ +static void lrng_set_digestsize(u32 digestsize) +{ + struct lrng_pool *pool = &lrng_pool; + u32 ent_bits = atomic_xchg_relaxed(&pool->aux_entropy_bits, 0), + old_digestsize = lrng_get_digestsize(); + + atomic_set(&lrng_pool.digestsize, digestsize); + + /* + * Update the /proc/.../write_wakeup_threshold which must not be larger + * than the digest size of the curent conditioning hash. + */ + digestsize <<= 3; + lrng_proc_update_max_write_thresh(digestsize); + if (lrng_write_wakeup_bits > digestsize) + lrng_write_wakeup_bits = digestsize; + + /* + * In case the new digest is larger than the old one, cap the available + * entropy to the old message digest used to process the existing data. + */ + ent_bits = min_t(u32, ent_bits, old_digestsize); + atomic_add(ent_bits, &pool->aux_entropy_bits); +} + +/* Obtain the digest size provided by the used hash in bits */ +u32 lrng_get_digestsize(void) +{ + return atomic_read_u32(&lrng_pool.digestsize) << 3; +} + +/* Set entropy content in user-space controllable aux pool */ +void lrng_pool_set_entropy(u32 entropy_bits) +{ + atomic_set(&lrng_pool.aux_entropy_bits, entropy_bits); +} + +/* + * Replace old with new hash for auxiliary pool handling + * + * Assumption: the caller must guarantee that the new_cb is available during the + * entire operation (e.g. it must hold the write lock against pointer updating). + */ +int lrng_aux_switch_hash(const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb) +{ + struct lrng_pool *pool = &lrng_pool; + struct shash_desc *shash = (struct shash_desc *)pool->aux_pool; + u8 digest[LRNG_MAX_DIGESTSIZE]; + int ret; + + if (!IS_ENABLED(CONFIG_LRNG_DRNG_SWITCH)) + return -EOPNOTSUPP; + + if (unlikely(!pool->initialized)) + return 0; + + /* Get the aux pool hash with old digest ... */ + ret = old_cb->lrng_hash_final(shash, digest) ?: + /* ... re-initialize the hash with the new digest ... */ + new_cb->lrng_hash_init(shash, new_hash) ?: + /* + * ... feed the old hash into the new state. We may feed + * uninitialized memory into the new state, but this is + * considered no issue and even good as we have some more + * uncertainty here. + */ + new_cb->lrng_hash_update(shash, digest, sizeof(digest)); + if (!ret) { + lrng_set_digestsize(new_cb->lrng_hash_digestsize(new_hash)); + pr_debug("Re-initialize aux entropy pool with hash %s\n", + new_cb->lrng_hash_name()); + } + + memzero_explicit(digest, sizeof(digest)); + return ret; +} + +/* Insert data into auxiliary pool by using the hash update function. */ +static int +lrng_pool_insert_aux_locked(const u8 *inbuf, u32 inbuflen, u32 entropy_bits) +{ + struct lrng_pool *pool = &lrng_pool; + struct shash_desc *shash = (struct shash_desc *)pool->aux_pool; + struct lrng_drng *drng = lrng_drng_init_instance(); + const struct lrng_crypto_cb *crypto_cb; + unsigned long flags; + void *hash; + int ret; + + entropy_bits = min_t(u32, entropy_bits, inbuflen << 3); + + lrng_hash_lock(drng, &flags); + + crypto_cb = drng->crypto_cb; + hash = drng->hash; + + if (unlikely(!pool->initialized)) { + ret = crypto_cb->lrng_hash_init(shash, hash); + if (ret) + goto out; + pool->initialized = true; + } + + ret = crypto_cb->lrng_hash_update(shash, inbuf, inbuflen); + if (ret) + goto out; + + /* + * Cap the available entropy to the hash output size compliant to + * SP800-90B section 3.1.5.1 table 1. + */ + entropy_bits += atomic_read_u32(&pool->aux_entropy_bits); + atomic_set(&pool->aux_entropy_bits, + min_t(u32, entropy_bits, + crypto_cb->lrng_hash_digestsize(hash) << 3)); + +out: + lrng_hash_unlock(drng, flags); + return ret; +} + +int lrng_pool_insert_aux(const u8 *inbuf, u32 inbuflen, u32 entropy_bits) +{ + struct lrng_pool *pool = &lrng_pool; + unsigned long flags; + int ret; + + spin_lock_irqsave(&pool->lock, flags); + ret = lrng_pool_insert_aux_locked(inbuf, inbuflen, entropy_bits); + spin_unlock_irqrestore(&pool->lock, flags); + + lrng_pool_add_entropy(); + + return ret; +} + +/************************* Get data from entropy pool *************************/ + +/* + * Get auxiliary entropy pool and its entropy content for seed buffer. + * Caller must hold lrng_pool.pool->lock. + * @outbuf: buffer to store data in with size requested_bits + * @requested_bits: Requested amount of entropy + * @return: amount of entropy in outbuf in bits. + */ +static u32 lrng_get_aux_pool(u8 *outbuf, u32 requested_bits) +{ + struct lrng_pool *pool = &lrng_pool; + struct shash_desc *shash = (struct shash_desc *)pool->aux_pool; + struct lrng_drng *drng = lrng_drng_init_instance(); + const struct lrng_crypto_cb *crypto_cb; + unsigned long flags; + void *hash; + u32 collected_ent_bits, returned_ent_bits, unused_bits = 0, + digestsize, requested_bits_osr; + u8 aux_output[LRNG_MAX_DIGESTSIZE]; + + if (unlikely(!pool->initialized)) + return 0; + + lrng_hash_lock(drng, &flags); + + crypto_cb = drng->crypto_cb; + hash = drng->hash; + digestsize = crypto_cb->lrng_hash_digestsize(hash); + + /* Ensure that no more than the size of aux_pool can be requested */ + requested_bits = min_t(u32, requested_bits, (LRNG_MAX_DIGESTSIZE << 3)); + requested_bits_osr = requested_bits + lrng_compress_osr(); + + /* Cap entropy with entropy counter from aux pool and the used digest */ + collected_ent_bits = min_t(u32, digestsize << 3, + atomic_xchg_relaxed(&pool->aux_entropy_bits, 0)); + + /* We collected too much entropy and put the overflow back */ + if (collected_ent_bits > requested_bits_osr) { + /* Amount of bits we collected too much */ + unused_bits = collected_ent_bits - requested_bits_osr; + /* Put entropy back */ + atomic_add(unused_bits, &pool->aux_entropy_bits); + /* Fix collected entropy */ + collected_ent_bits = requested_bits_osr; + } + + /* Apply oversampling: discount requested oversampling rate */ + returned_ent_bits = lrng_reduce_by_osr(collected_ent_bits); + + pr_debug("obtained %u bits by collecting %u bits of entropy from aux pool, %u bits of entropy remaining\n", + returned_ent_bits, collected_ent_bits, unused_bits); + + /* Get the digest for the aux pool to be returned to the caller ... */ + if (crypto_cb->lrng_hash_final(shash, aux_output) || + /* + * ... and re-initialize the aux state. Do not add the aux pool + * digest for backward secrecy as it will be added with the + * insertion of the complete seed buffer after it has been filled. + */ + crypto_cb->lrng_hash_init(shash, hash)) { + returned_ent_bits = 0; + } else { + /* + * Do not truncate the output size exactly to collected_ent_bits + * as the aux pool may contain data that is not credited with + * entropy, but we want to use them to stir the DRNG state. + */ + memcpy(outbuf, aux_output, requested_bits >> 3); + } + + lrng_hash_unlock(drng, flags); + memzero_explicit(aux_output, digestsize); + return returned_ent_bits; +} + +void lrng_get_backtrack_aux(struct entropy_buf *entropy_buf, u32 requested_bits) +{ + struct lrng_pool *pool = &lrng_pool; + unsigned long flags; + + /* Ensure aux pool extraction and backtracking op are atomic */ + spin_lock_irqsave(&pool->lock, flags); + + entropy_buf->a_bits = lrng_get_aux_pool(entropy_buf->a, requested_bits); + + /* Mix the extracted data back into pool for backtracking resistance */ + if (lrng_pool_insert_aux_locked((u8 *)entropy_buf, + sizeof(struct entropy_buf), 0)) + pr_warn("Backtracking resistance operation failed\n"); + + spin_unlock_irqrestore(&pool->lock, flags); +} + +void lrng_aux_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + "Auxiliary ES properties:\n" + " Hash for operating entropy pool: %s\n", + lrng_drng_init->crypto_cb->lrng_hash_name()); +} diff --git a/drivers/char/lrng/lrng_es_irq.c b/drivers/char/lrng/lrng_es_irq.c new file mode 100644 index 0000000000000..6b13ff97d1782 --- /dev/null +++ b/drivers/char/lrng/lrng_es_irq.c @@ -0,0 +1,824 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Slow Entropy Source: Interrupt data collection + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include "lrng_internal.h" +#include "lrng_es_irq.h" + +/* + * Number of interrupts to be recorded to assume that DRNG security strength + * bits of entropy are received. + * Note: a value below the DRNG security strength should not be defined as this + * may imply the DRNG can never be fully seeded in case other noise + * sources are unavailable. + */ +#define LRNG_IRQ_ENTROPY_BITS CONFIG_LRNG_IRQ_ENTROPY_RATE + + +/* Number of interrupts required for LRNG_DRNG_SECURITY_STRENGTH_BITS entropy */ +static u32 lrng_irq_entropy_bits = LRNG_IRQ_ENTROPY_BITS; +/* Is high-resolution timer present? */ +static bool lrng_irq_highres_timer = false; + +static u32 irq_entropy __read_mostly = LRNG_IRQ_ENTROPY_BITS; +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(irq_entropy, uint, 0444); +MODULE_PARM_DESC(irq_entropy, + "How many interrupts must be collected for obtaining 256 bits of entropy\n"); +#endif + +/* Per-CPU array holding concatenated entropy events */ +static DEFINE_PER_CPU(u32 [LRNG_DATA_ARRAY_SIZE], lrng_pcpu_array) + __aligned(LRNG_KCAPI_ALIGN); +static DEFINE_PER_CPU(u32, lrng_pcpu_array_ptr) = 0; +static DEFINE_PER_CPU(atomic_t, lrng_pcpu_array_irqs) = ATOMIC_INIT(0); + +/* + * The entropy collection is performed by executing the following steps: + * 1. fill up the per-CPU array holding the time stamps + * 2. once the per-CPU array is full, a compression of the data into + * the entropy pool is performed - this happens in interrupt context + * + * If step 2 is not desired in interrupt context, the following boolean + * needs to be set to false. This implies that old entropy data in the + * per-CPU array collected since the last DRNG reseed is overwritten with + * new entropy data instead of retaining the entropy with the compression + * operation. + * + * Impact on entropy: + * + * If continuous compression is enabled, the maximum entropy that is collected + * per CPU between DRNG reseeds is equal to the digest size of the used hash. + * + * If continuous compression is disabled, the maximum number of entropy events + * that can be collected per CPU is equal to LRNG_DATA_ARRAY_SIZE. This amount + * of events is converted into an entropy statement which then represents the + * maximum amount of entropy collectible per CPU between DRNG reseeds. + */ +static bool lrng_pcpu_continuous_compression __read_mostly = + IS_ENABLED(CONFIG_LRNG_ENABLE_CONTINUOUS_COMPRESSION); + +#ifdef CONFIG_LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION +module_param(lrng_pcpu_continuous_compression, bool, 0444); +MODULE_PARM_DESC(lrng_pcpu_continuous_compression, + "Perform entropy compression if per-CPU entropy data array is full\n"); +#endif + +/* + * Per-CPU entropy pool with compressed entropy event + * + * The per-CPU entropy pool is defined as the hash state. New data is simply + * inserted into the entropy pool by performing a hash update operation. + * To read the entropy pool, a hash final must be invoked. However, before + * the entropy pool is released again after a hash final, the hash init must + * be performed. + */ +static DEFINE_PER_CPU(u8 [LRNG_POOL_SIZE], lrng_pcpu_pool) + __aligned(LRNG_KCAPI_ALIGN); +/* + * Lock to allow other CPUs to read the pool - as this is only done during + * reseed which is infrequent, this lock is hardly contended. + */ +static DEFINE_PER_CPU(spinlock_t, lrng_pcpu_lock); +static DEFINE_PER_CPU(bool, lrng_pcpu_lock_init) = false; + +/* Number of time stamps analyzed to calculate a GCD */ +#define LRNG_GCD_WINDOW_SIZE 100 +static u32 lrng_gcd_history[LRNG_GCD_WINDOW_SIZE]; +static atomic_t lrng_gcd_history_ptr = ATOMIC_INIT(-1); + +/* The common divisor for all timestamps */ +static u32 lrng_gcd_timer = 0; + +static bool lrng_gcd_tested(void) +{ + return (lrng_gcd_timer != 0); +} + +/* Set the GCD for use in IRQ ES - if 0, the GCD calculation is restarted. */ +static void _lrng_gcd_set(u32 running_gcd) +{ + lrng_gcd_timer = running_gcd; + /* Ensure that update to global variable lrng_gcd_timer is visible */ + mb(); +} + +static void lrng_gcd_set(u32 running_gcd) +{ + if (!lrng_gcd_tested()) { + _lrng_gcd_set(running_gcd); + pr_debug("Setting GCD to %u\n", running_gcd); + } +} + +u32 lrng_gcd_analyze(u32 *history, size_t nelem) +{ + u32 running_gcd = 0; + size_t i; + + /* Now perform the analysis on the accumulated time data. */ + for (i = 0; i < nelem; i++) { + /* + * NOTE: this would be the place to add more analysis on the + * appropriateness of the timer like checking the presence + * of sufficient variations in the timer. + */ + + /* + * This calculates the gcd of all the time values. that is + * gcd(time_1, time_2, ..., time_nelem) + * + * Some timers increment by a fixed (non-1) amount each step. + * This code checks for such increments, and allows the library + * to output the number of such changes have occurred. + */ + running_gcd = (u32)gcd(history[i], running_gcd); + + /* Zeroize data */ + history[i] = 0; + } + + return running_gcd; +} + +static void lrng_gcd_add_value(u32 time) +{ + u32 ptr = (u32)atomic_inc_return_relaxed(&lrng_gcd_history_ptr); + + if (ptr < LRNG_GCD_WINDOW_SIZE) { + lrng_gcd_history[ptr] = time; + } else if (ptr == LRNG_GCD_WINDOW_SIZE) { + u32 gcd = lrng_gcd_analyze(lrng_gcd_history, + LRNG_GCD_WINDOW_SIZE); + + if (!gcd) + gcd = 1; + + /* + * Ensure that we have variations in the time stamp below the + * given value. This is just a safety measure to prevent the GCD + * becoming too large. + */ + if (gcd >= 1000) { + pr_warn("calculated GCD is larger than expected: %u\n", + gcd); + gcd = 1000; + } + + /* Adjust all deltas by the observed (small) common factor. */ + lrng_gcd_set(gcd); + atomic_set(&lrng_gcd_history_ptr, 0); + } +} + +/* Return boolean whether LRNG identified presence of high-resolution timer */ +static bool lrng_pool_highres_timer(void) +{ + return lrng_irq_highres_timer; +} + +/* Convert entropy in bits into number of IRQs with the same entropy content. */ +static u32 lrng_entropy_to_data(u32 entropy_bits) +{ + return ((entropy_bits * lrng_irq_entropy_bits) / + LRNG_DRNG_SECURITY_STRENGTH_BITS); +} + +/* Convert number of IRQs into entropy value. */ +static u32 lrng_data_to_entropy(u32 irqnum) +{ + return ((irqnum * LRNG_DRNG_SECURITY_STRENGTH_BITS) / + lrng_irq_entropy_bits); +} + +static bool lrng_pcpu_pool_online(int cpu) +{ + return per_cpu(lrng_pcpu_lock_init, cpu); +} + +static void lrng_pcpu_check_compression_state(void) +{ + /* One pool must hold sufficient entropy for disabled compression */ + if (!lrng_pcpu_continuous_compression) { + u32 max_ent = min_t(u32, lrng_get_digestsize(), + lrng_data_to_entropy(LRNG_DATA_NUM_VALUES)); + if (max_ent < lrng_security_strength()) { + pr_warn("Force continuous compression operation to ensure LRNG can hold enough entropy\n"); + lrng_pcpu_continuous_compression = true; + } + } +} + +static int __init lrng_init_time_source(void) +{ + /* Set a minimum number of interrupts that must be collected */ + irq_entropy = max_t(u32, LRNG_IRQ_ENTROPY_BITS, irq_entropy); + + if ((random_get_entropy() & LRNG_DATA_SLOTSIZE_MASK) || + (random_get_entropy() & LRNG_DATA_SLOTSIZE_MASK)) { + /* + * As the highres timer is identified here, previous interrupts + * obtained during boot time are treated like a lowres-timer + * would have been present. + */ + lrng_irq_highres_timer = true; + lrng_irq_entropy_bits = irq_entropy; + } else { + u32 new_entropy = irq_entropy * LRNG_IRQ_OVERSAMPLING_FACTOR; + + lrng_health_disable(); + lrng_irq_highres_timer = false; + lrng_irq_entropy_bits = (irq_entropy < new_entropy) ? + new_entropy : irq_entropy; + pr_warn("operating without high-resolution timer and applying IRQ oversampling factor %u\n", + LRNG_IRQ_OVERSAMPLING_FACTOR); + lrng_pcpu_check_compression_state(); + } + /* Ensure that changes to global variables are visible */ + mb(); + + return 0; +} +core_initcall(lrng_init_time_source); + +/* + * Reset all per-CPU pools - reset entropy estimator but leave the pool data + * that may or may not have entropy unchanged. + */ +void lrng_pcpu_reset(void) +{ + int cpu; + + /* Trigger GCD calculation anew. */ + _lrng_gcd_set(0); + + for_each_online_cpu(cpu) + atomic_set(per_cpu_ptr(&lrng_pcpu_array_irqs, cpu), 0); +} + +u32 lrng_pcpu_avail_pool_size(void) +{ + u32 max_size = 0, max_pool = lrng_get_digestsize(); + int cpu; + + if (!lrng_pcpu_continuous_compression) + max_pool = min_t(u32, max_pool, LRNG_DATA_NUM_VALUES); + + for_each_online_cpu(cpu) { + if (lrng_pcpu_pool_online(cpu)) + max_size += max_pool; + } + + return max_size; +} + +/* Return entropy of unused IRQs present in all per-CPU pools. */ +u32 lrng_pcpu_avail_entropy(void) +{ + u32 digestsize_irqs, irq = 0; + int cpu; + + /* Obtain the cap of maximum numbers of IRQs we count */ + digestsize_irqs = lrng_entropy_to_data(lrng_get_digestsize()); + if (!lrng_pcpu_continuous_compression) { + /* Cap to max. number of IRQs the array can hold */ + digestsize_irqs = min_t(u32, digestsize_irqs, + LRNG_DATA_NUM_VALUES); + } + + for_each_online_cpu(cpu) { + if (!lrng_pcpu_pool_online(cpu)) + continue; + irq += min_t(u32, digestsize_irqs, + atomic_read_u32(per_cpu_ptr(&lrng_pcpu_array_irqs, + cpu))); + } + + /* Consider oversampling rate */ + return lrng_reduce_by_osr(lrng_data_to_entropy(irq)); +} + +/* + * Trigger a switch of the hash implementation for the per-CPU pool. + * + * For each per-CPU pool, obtain the message digest with the old hash + * implementation, initialize the per-CPU pool again with the new hash + * implementation and inject the message digest into the new state. + * + * Assumption: the caller must guarantee that the new_cb is available during the + * entire operation (e.g. it must hold the lock against pointer updating). + */ +int lrng_pcpu_switch_hash(int node, + const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb) +{ + u8 digest[LRNG_MAX_DIGESTSIZE]; + u32 digestsize_irqs, found_irqs; + int ret = 0, cpu; + + if (!IS_ENABLED(CONFIG_LRNG_DRNG_SWITCH)) + return -EOPNOTSUPP; + + for_each_online_cpu(cpu) { + struct shash_desc *pcpu_shash; + + /* + * Only switch the per-CPU pools for the current node because + * the crypto_cb only applies NUMA-node-wide. + */ + if (cpu_to_node(cpu) != node || !lrng_pcpu_pool_online(cpu)) + continue; + + pcpu_shash = (struct shash_desc *)per_cpu_ptr(lrng_pcpu_pool, + cpu); + + digestsize_irqs = old_cb->lrng_hash_digestsize(pcpu_shash); + digestsize_irqs = lrng_entropy_to_data(digestsize_irqs << 3); + + if (pcpu_shash->tfm == new_hash) + continue; + + /* Get the per-CPU pool hash with old digest ... */ + ret = old_cb->lrng_hash_final(pcpu_shash, digest) ?: + /* ... re-initialize the hash with the new digest ... */ + new_cb->lrng_hash_init(pcpu_shash, new_hash) ?: + /* + * ... feed the old hash into the new state. We may feed + * uninitialized memory into the new state, but this is + * considered no issue and even good as we have some more + * uncertainty here. + */ + new_cb->lrng_hash_update(pcpu_shash, digest, + sizeof(digest)); + if (ret) + goto out; + + /* + * In case the new digest is larger than the old one, cap + * the available entropy to the old message digest used to + * process the existing data. + */ + found_irqs = atomic_xchg_relaxed( + per_cpu_ptr(&lrng_pcpu_array_irqs, cpu), 0); + found_irqs = min_t(u32, found_irqs, digestsize_irqs); + atomic_add_return_relaxed(found_irqs, + per_cpu_ptr(&lrng_pcpu_array_irqs, cpu)); + + pr_debug("Re-initialize per-CPU entropy pool for CPU %d on NUMA node %d with hash %s\n", + cpu, node, new_cb->lrng_hash_name()); + } + +out: + memzero_explicit(digest, sizeof(digest)); + return ret; +} + +/* + * When reading the per-CPU message digest, make sure we use the crypto + * callbacks defined for the NUMA node the per-CPU pool is defined for because + * the LRNG crypto switch support is only atomic per NUMA node. + */ +static u32 +lrng_pcpu_pool_hash_one(const struct lrng_crypto_cb *pcpu_crypto_cb, + void *pcpu_hash, int cpu, u8 *digest, u32 *digestsize) +{ + struct shash_desc *pcpu_shash = + (struct shash_desc *)per_cpu_ptr(lrng_pcpu_pool, cpu); + spinlock_t *lock = per_cpu_ptr(&lrng_pcpu_lock, cpu); + unsigned long flags; + u32 digestsize_irqs, found_irqs; + + /* Lock guarding against reading / writing to per-CPU pool */ + spin_lock_irqsave(lock, flags); + + *digestsize = pcpu_crypto_cb->lrng_hash_digestsize(pcpu_hash); + digestsize_irqs = lrng_entropy_to_data(*digestsize << 3); + + /* Obtain entropy statement like for the entropy pool */ + found_irqs = atomic_xchg_relaxed( + per_cpu_ptr(&lrng_pcpu_array_irqs, cpu), 0); + /* Cap to maximum amount of data we can hold in hash */ + found_irqs = min_t(u32, found_irqs, digestsize_irqs); + + /* Cap to maximum amount of data we can hold in array */ + if (!lrng_pcpu_continuous_compression) + found_irqs = min_t(u32, found_irqs, LRNG_DATA_NUM_VALUES); + + /* Store all not-yet compressed data in data array into hash, ... */ + if (pcpu_crypto_cb->lrng_hash_update(pcpu_shash, + (u8 *)per_cpu_ptr(lrng_pcpu_array, cpu), + LRNG_DATA_ARRAY_SIZE * sizeof(u32)) ?: + /* ... get the per-CPU pool digest, ... */ + pcpu_crypto_cb->lrng_hash_final(pcpu_shash, digest) ?: + /* ... re-initialize the hash, ... */ + pcpu_crypto_cb->lrng_hash_init(pcpu_shash, pcpu_hash) ?: + /* ... feed the old hash into the new state. */ + pcpu_crypto_cb->lrng_hash_update(pcpu_shash, digest, *digestsize)) + found_irqs = 0; + + spin_unlock_irqrestore(lock, flags); + return found_irqs; +} + +/* + * Hash all per-CPU pools and return the digest to be used as seed data for + * seeding a DRNG. The caller must guarantee backtracking resistance. + * The function will only copy as much data as entropy is available into the + * caller-provided output buffer. + * + * This function handles the translation from the number of received interrupts + * into an entropy statement. The conversion depends on LRNG_IRQ_ENTROPY_BITS + * which defines how many interrupts must be received to obtain 256 bits of + * entropy. With this value, the function lrng_data_to_entropy converts a given + * data size (received interrupts, requested amount of data, etc.) into an + * entropy statement. lrng_entropy_to_data does the reverse. + * + * @outbuf: buffer to store data in with size requested_bits + * @requested_bits: Requested amount of entropy + * @fully_seeded: indicator whether LRNG is fully seeded + * @return: amount of entropy in outbuf in bits. + */ +u32 lrng_pcpu_pool_hash(u8 *outbuf, u32 requested_bits, bool fully_seeded) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_crypto_cb *crypto_cb; + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *drng = lrng_drng_init_instance(); + u8 digest[LRNG_MAX_DIGESTSIZE]; + unsigned long flags, flags2; + u32 found_irqs, collected_irqs = 0, collected_ent_bits, requested_irqs, + returned_ent_bits; + int ret, cpu; + void *hash; + + /* Lock guarding replacement of per-NUMA hash */ + lrng_hash_lock(drng, &flags); + + crypto_cb = drng->crypto_cb; + hash = drng->hash; + + /* The hash state of filled with all per-CPU pool hashes. */ + ret = crypto_cb->lrng_hash_init(shash, hash); + if (ret) + goto err; + + requested_irqs = lrng_entropy_to_data(requested_bits + + lrng_compress_osr()); + + /* + * Harvest entropy from each per-CPU hash state - even though we may + * have collected sufficient entropy, we will hash all per-CPU pools. + */ + for_each_online_cpu(cpu) { + struct lrng_drng *pcpu_drng = drng; + u32 digestsize, pcpu_unused_irqs = 0; + int node = cpu_to_node(cpu); + + /* If pool is not online, then no entropy is present. */ + if (!lrng_pcpu_pool_online(cpu)) + continue; + + if (lrng_drng && lrng_drng[node]) + pcpu_drng = lrng_drng[node]; + + if (pcpu_drng == drng) { + found_irqs = lrng_pcpu_pool_hash_one(crypto_cb, hash, + cpu, digest, + &digestsize); + } else { + lrng_hash_lock(pcpu_drng, &flags2); + found_irqs = + lrng_pcpu_pool_hash_one(pcpu_drng->crypto_cb, + pcpu_drng->hash, cpu, + digest, &digestsize); + lrng_hash_unlock(pcpu_drng, flags2); + } + + /* Inject the digest into the state of all per-CPU pools */ + ret = crypto_cb->lrng_hash_update(shash, digest, digestsize); + if (ret) + goto err; + + collected_irqs += found_irqs; + if (collected_irqs > requested_irqs) { + pcpu_unused_irqs = collected_irqs - requested_irqs; + atomic_add_return_relaxed(pcpu_unused_irqs, + per_cpu_ptr(&lrng_pcpu_array_irqs, cpu)); + collected_irqs = requested_irqs; + } + pr_debug("%u interrupts used from entropy pool of CPU %d, %u interrupts remain unused\n", + found_irqs - pcpu_unused_irqs, cpu, pcpu_unused_irqs); + } + + ret = crypto_cb->lrng_hash_final(shash, digest); + if (ret) + goto err; + + collected_ent_bits = lrng_data_to_entropy(collected_irqs); + /* Cap to maximum entropy that can ever be generated with given hash */ + collected_ent_bits = min_t(u32, collected_ent_bits, + crypto_cb->lrng_hash_digestsize(hash) << 3); + /* Apply oversampling: discount requested oversampling rate */ + returned_ent_bits = lrng_reduce_by_osr(collected_ent_bits); + + pr_debug("obtained %u bits by collecting %u bits of entropy from entropy pool noise source\n", + returned_ent_bits, collected_ent_bits); + + /* + * Truncate to available entropy as implicitly allowed by SP800-90B + * section 3.1.5.1.1 table 1 which awards truncated hashes full + * entropy. + * + * During boot time, we read requested_bits data with + * returned_ent_bits entropy. In case our conservative entropy + * estimate underestimates the available entropy we can transport as + * much available entropy as possible. + */ + memcpy(outbuf, digest, fully_seeded ? returned_ent_bits >> 3 : + requested_bits >> 3); + +out: + crypto_cb->lrng_hash_desc_zero(shash); + lrng_hash_unlock(drng, flags); + memzero_explicit(digest, sizeof(digest)); + return returned_ent_bits; + +err: + returned_ent_bits = 0; + goto out; +} + +/* Compress the lrng_pcpu_array array into lrng_pcpu_pool */ +static void lrng_pcpu_array_compress(void) +{ + struct shash_desc *shash = + (struct shash_desc *)this_cpu_ptr(lrng_pcpu_pool); + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *drng = lrng_drng_init_instance(); + const struct lrng_crypto_cb *crypto_cb; + spinlock_t *lock = this_cpu_ptr(&lrng_pcpu_lock); + unsigned long flags, flags2; + int node = numa_node_id(); + void *hash; + bool init = false; + + /* Get NUMA-node local hash instance */ + if (lrng_drng && lrng_drng[node]) + drng = lrng_drng[node]; + + lrng_hash_lock(drng, &flags); + crypto_cb = drng->crypto_cb; + hash = drng->hash; + + if (unlikely(!this_cpu_read(lrng_pcpu_lock_init))) { + init = true; + spin_lock_init(lock); + this_cpu_write(lrng_pcpu_lock_init, true); + pr_debug("Initializing per-CPU entropy pool for CPU %d on NUMA node %d with hash %s\n", + raw_smp_processor_id(), node, + crypto_cb->lrng_hash_name()); + } + + spin_lock_irqsave(lock, flags2); + + if (unlikely(init) && crypto_cb->lrng_hash_init(shash, hash)) { + this_cpu_write(lrng_pcpu_lock_init, false); + pr_warn("Initialization of hash failed\n"); + } else if (lrng_pcpu_continuous_compression) { + /* Add entire per-CPU data array content into entropy pool. */ + if (crypto_cb->lrng_hash_update(shash, + (u8 *)this_cpu_ptr(lrng_pcpu_array), + LRNG_DATA_ARRAY_SIZE * sizeof(u32))) + pr_warn_ratelimited("Hashing of entropy data failed\n"); + } + + spin_unlock_irqrestore(lock, flags2); + lrng_hash_unlock(drng, flags); +} + +/* Compress data array into hash */ +static void lrng_pcpu_array_to_hash(u32 ptr) +{ + u32 *array = this_cpu_ptr(lrng_pcpu_array); + + /* + * During boot time the hash operation is triggered more often than + * during regular operation. + */ + if (unlikely(!lrng_state_fully_seeded())) { + if ((ptr & 31) && (ptr < LRNG_DATA_WORD_MASK)) + return; + } else if (ptr < LRNG_DATA_WORD_MASK) { + return; + } + + if (lrng_raw_array_entropy_store(*array)) { + u32 i; + + /* + * If we fed even a part of the array to external analysis, we + * mark that the entire array and the per-CPU pool to have no + * entropy. This is due to the non-IID property of the data as + * we do not fully know whether the existing dependencies + * diminish the entropy beyond to what we expect it has. + */ + atomic_set(this_cpu_ptr(&lrng_pcpu_array_irqs), 0); + + for (i = 1; i < LRNG_DATA_ARRAY_SIZE; i++) + lrng_raw_array_entropy_store(*(array + i)); + } else { + lrng_pcpu_array_compress(); + /* Ping pool handler about received entropy */ + lrng_pool_add_entropy(); + } +} + +/* + * Concatenate full 32 bit word at the end of time array even when current + * ptr is not aligned to sizeof(data). + */ +static void _lrng_pcpu_array_add_u32(u32 data) +{ + /* Increment pointer by number of slots taken for input value */ + u32 pre_ptr, mask, ptr = this_cpu_add_return(lrng_pcpu_array_ptr, + LRNG_DATA_SLOTS_PER_UINT); + unsigned int pre_array; + + /* + * This function injects a unit into the array - guarantee that + * array unit size is equal to data type of input data. + */ + BUILD_BUG_ON(LRNG_DATA_ARRAY_MEMBER_BITS != (sizeof(data) << 3)); + + /* + * The following logic requires at least two units holding + * the data as otherwise the pointer would immediately wrap when + * injection an u32 word. + */ + BUILD_BUG_ON(LRNG_DATA_NUM_VALUES <= LRNG_DATA_SLOTS_PER_UINT); + + lrng_pcpu_split_u32(&ptr, &pre_ptr, &mask); + + /* MSB of data go into previous unit */ + pre_array = lrng_data_idx2array(pre_ptr); + /* zeroization of slot to ensure the following OR adds the data */ + this_cpu_and(lrng_pcpu_array[pre_array], ~(0xffffffff & ~mask)); + this_cpu_or(lrng_pcpu_array[pre_array], data & ~mask); + + /* Invoke compression as we just filled data array completely */ + if (unlikely(pre_ptr > ptr)) + lrng_pcpu_array_to_hash(LRNG_DATA_WORD_MASK); + + /* LSB of data go into current unit */ + this_cpu_write(lrng_pcpu_array[lrng_data_idx2array(ptr)], + data & mask); + + if (likely(pre_ptr <= ptr)) + lrng_pcpu_array_to_hash(ptr); +} + +/* Concatenate a 32-bit word at the end of the per-CPU array */ +void lrng_pcpu_array_add_u32(u32 data) +{ + /* + * Disregard entropy-less data without continuous compression to + * avoid it overwriting data with entropy when array ptr wraps. + */ + if (lrng_pcpu_continuous_compression) + _lrng_pcpu_array_add_u32(data); +} + +/* Concatenate data of max LRNG_DATA_SLOTSIZE_MASK at the end of time array */ +static void lrng_pcpu_array_add_slot(u32 data) +{ + /* Get slot */ + u32 ptr = this_cpu_inc_return(lrng_pcpu_array_ptr) & + LRNG_DATA_WORD_MASK; + unsigned int array = lrng_data_idx2array(ptr); + unsigned int slot = lrng_data_idx2slot(ptr); + + BUILD_BUG_ON(LRNG_DATA_ARRAY_MEMBER_BITS % LRNG_DATA_SLOTSIZE_BITS); + /* Ensure consistency of values */ + BUILD_BUG_ON(LRNG_DATA_ARRAY_MEMBER_BITS != + sizeof(lrng_pcpu_array[0]) << 3); + + /* zeroization of slot to ensure the following OR adds the data */ + this_cpu_and(lrng_pcpu_array[array], + ~(lrng_data_slot_val(0xffffffff & LRNG_DATA_SLOTSIZE_MASK, + slot))); + /* Store data into slot */ + this_cpu_or(lrng_pcpu_array[array], lrng_data_slot_val(data, slot)); + + lrng_pcpu_array_to_hash(ptr); +} + +static void +lrng_time_process_common(u32 time, void(*add_time)(u32 data)) +{ + enum lrng_health_res health_test; + + if (lrng_raw_hires_entropy_store(time)) + return; + + health_test = lrng_health_test(time); + if (health_test > lrng_health_fail_use) + return; + + if (health_test == lrng_health_pass) + atomic_inc_return(this_cpu_ptr(&lrng_pcpu_array_irqs)); + + add_time(time); +} + +/* + * Batching up of entropy in per-CPU array before injecting into entropy pool. + */ +static void lrng_time_process(void) +{ + u32 now_time = random_get_entropy(); + + if (unlikely(!lrng_gcd_tested())) { + /* When GCD is unknown, we process the full time stamp */ + lrng_time_process_common(now_time, _lrng_pcpu_array_add_u32); + lrng_gcd_add_value(now_time); + } else { + /* GCD is known and applied */ + lrng_time_process_common((now_time / lrng_gcd_timer) & + LRNG_DATA_SLOTSIZE_MASK, + lrng_pcpu_array_add_slot); + } + + lrng_perf_time(now_time); +} + +/* Hot code path - Callback for interrupt handler */ +void add_interrupt_randomness(int irq) +{ + if (lrng_pool_highres_timer()) { + lrng_time_process(); + } else { + struct pt_regs *regs = get_irq_regs(); + static atomic_t reg_idx = ATOMIC_INIT(0); + u64 ip; + u32 tmp; + + if (regs) { + u32 *ptr = (u32 *)regs; + int reg_ptr = atomic_add_return_relaxed(1, ®_idx); + size_t n = (sizeof(struct pt_regs) / sizeof(u32)); + + ip = instruction_pointer(regs); + tmp = *(ptr + (reg_ptr % n)); + tmp = lrng_raw_regs_entropy_store(tmp) ? 0 : tmp; + _lrng_pcpu_array_add_u32(tmp); + } else { + ip = _RET_IP_; + } + + lrng_time_process(); + + /* + * The XOR operation combining the different values is not + * considered to destroy entropy since the entirety of all + * processed values delivers the entropy (and not each + * value separately of the other values). + */ + tmp = lrng_raw_jiffies_entropy_store(jiffies) ? 0 : jiffies; + tmp ^= lrng_raw_irq_entropy_store(irq) ? 0 : irq; + tmp ^= lrng_raw_retip_entropy_store(ip) ? 0 : ip; + tmp ^= ip >> 32; + _lrng_pcpu_array_add_u32(tmp); + } +} +EXPORT_SYMBOL(add_interrupt_randomness); + +void lrng_irq_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + "IRQ ES properties:\n" + " Hash for operating entropy pool: %s\n" + " per-CPU interrupt collection size: %u\n" + " Standards compliance: %s\n" + " High-resolution timer: %s\n" + " Continuous compression: %s\n", + lrng_drng_init->crypto_cb->lrng_hash_name(), + LRNG_DATA_NUM_VALUES, + lrng_sp80090b_compliant() ? "SP800-90B " : "", + lrng_pool_highres_timer() ? "true" : "false", + lrng_pcpu_continuous_compression ? "true" : "false"); +} diff --git a/drivers/char/lrng/lrng_es_irq.h b/drivers/char/lrng/lrng_es_irq.h new file mode 100644 index 0000000000000..00b16b1aa45fe --- /dev/null +++ b/drivers/char/lrng/lrng_es_irq.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * LRNG Slow Noise Source: Time stamp array handling + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +/* + * To limit the impact on the interrupt handling, the LRNG concatenates + * entropic LSB parts of the time stamps in a per-CPU array and only + * injects them into the entropy pool when the array is full. + */ + +/* Store multiple integers in one u32 */ +#define LRNG_DATA_SLOTSIZE_BITS (8) +#define LRNG_DATA_SLOTSIZE_MASK ((1 << LRNG_DATA_SLOTSIZE_BITS) - 1) +#define LRNG_DATA_ARRAY_MEMBER_BITS (4 << 3) /* ((sizeof(u32)) << 3) */ +#define LRNG_DATA_SLOTS_PER_UINT (LRNG_DATA_ARRAY_MEMBER_BITS / \ + LRNG_DATA_SLOTSIZE_BITS) + +/* + * Number of time values to store in the array - in small environments + * only one atomic_t variable per CPU is used. + */ +#define LRNG_DATA_NUM_VALUES (CONFIG_LRNG_COLLECTION_SIZE) +/* Mask of LSB of time stamp to store */ +#define LRNG_DATA_WORD_MASK (LRNG_DATA_NUM_VALUES - 1) + +#define LRNG_DATA_SLOTS_MASK (LRNG_DATA_SLOTS_PER_UINT - 1) +#define LRNG_DATA_ARRAY_SIZE (LRNG_DATA_NUM_VALUES / \ + LRNG_DATA_SLOTS_PER_UINT) + +/* Starting bit index of slot */ +static inline unsigned int lrng_data_slot2bitindex(unsigned int slot) +{ + return (LRNG_DATA_SLOTSIZE_BITS * slot); +} + +/* Convert index into the array index */ +static inline unsigned int lrng_data_idx2array(unsigned int idx) +{ + return idx / LRNG_DATA_SLOTS_PER_UINT; +} + +/* Convert index into the slot of a given array index */ +static inline unsigned int lrng_data_idx2slot(unsigned int idx) +{ + return idx & LRNG_DATA_SLOTS_MASK; +} + +/* Convert value into slot value */ +static inline unsigned int lrng_data_slot_val(unsigned int val, + unsigned int slot) +{ + return val << lrng_data_slot2bitindex(slot); +} + +/* + * Return the pointers for the previous and current units to inject a u32 into. + * Also return the mask which the u32 word is to be processed. + */ +static inline void lrng_pcpu_split_u32(u32 *ptr, u32 *pre_ptr, u32 *mask) +{ + /* ptr to previous unit */ + *pre_ptr = (*ptr - LRNG_DATA_SLOTS_PER_UINT) & LRNG_DATA_WORD_MASK; + *ptr &= LRNG_DATA_WORD_MASK; + + /* mask to split data into the two parts for the two units */ + *mask = ((1 << (*pre_ptr & (LRNG_DATA_SLOTS_PER_UINT - 1)) * + LRNG_DATA_SLOTSIZE_BITS)) - 1; +} diff --git a/drivers/char/lrng/lrng_es_jent.c b/drivers/char/lrng/lrng_es_jent.c new file mode 100644 index 0000000000000..79b3dc8e8f562 --- /dev/null +++ b/drivers/char/lrng/lrng_es_jent.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Fast Entropy Source: Jitter RNG + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_internal.h" + +/* + * Estimated entropy of data is a 16th of LRNG_DRNG_SECURITY_STRENGTH_BITS. + * Albeit a full entropy assessment is provided for the noise source indicating + * that it provides high entropy rates and considering that it deactivates + * when it detects insufficient hardware, the chosen under estimation of + * entropy is considered to be acceptable to all reviewers. + */ +static u32 jitterrng = CONFIG_LRNG_JENT_ENTROPY_RATE; +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(jitterrng, uint, 0644); +MODULE_PARM_DESC(jitterrng, "Entropy in bits of 256 data bits from Jitter RNG noise source"); +#endif + +static bool lrng_jent_initialized = false; +static struct rand_data *lrng_jent_state; + +static int __init lrng_jent_initialize(void) +{ + /* Initialize the Jitter RNG after the clocksources are initialized. */ + if (jent_entropy_init() || + (lrng_jent_state = jent_entropy_collector_alloc(1, 0)) == NULL) { + jitterrng = 0; + pr_info("Jitter RNG unusable on current system\n"); + return 0; + } + lrng_jent_initialized = true; + lrng_pool_add_entropy(); + pr_debug("Jitter RNG working on current system\n"); + + return 0; +} +device_initcall(lrng_jent_initialize); + +/* + * lrng_get_jent() - Get Jitter RNG entropy + * + * @outbuf: buffer to store entropy + * @outbuflen: length of buffer + * + * Return: + * * > 0 on success where value provides the added entropy in bits + * * 0 if no fast source was available + */ +u32 lrng_get_jent(u8 *outbuf, u32 requested_bits) +{ + int ret; + u32 ent_bits = lrng_jent_entropylevel(requested_bits); + unsigned long flags; + static DEFINE_SPINLOCK(lrng_jent_lock); + + spin_lock_irqsave(&lrng_jent_lock, flags); + + if (!lrng_jent_initialized) { + spin_unlock_irqrestore(&lrng_jent_lock, flags); + return 0; + } + + ret = jent_read_entropy(lrng_jent_state, outbuf, requested_bits >> 3); + spin_unlock_irqrestore(&lrng_jent_lock, flags); + + if (ret) { + pr_debug("Jitter RNG failed with %d\n", ret); + return 0; + } + + pr_debug("obtained %u bits of entropy from Jitter RNG noise source\n", + ent_bits); + + return ent_bits; +} + +u32 lrng_jent_entropylevel(u32 requested_bits) +{ + return lrng_fast_noise_entropylevel((lrng_jent_initialized) ? + jitterrng : 0, requested_bits); +} + +void lrng_jent_es_state(unsigned char *buf, size_t buflen) +{ + snprintf(buf, buflen, + "JitterRNG ES properties:\n" + " Enabled: %s\n", lrng_jent_initialized ? "true" : "false"); +} diff --git a/drivers/char/lrng/lrng_es_mgr.c b/drivers/char/lrng/lrng_es_mgr.c new file mode 100644 index 0000000000000..efeb62ce0ce9e --- /dev/null +++ b/drivers/char/lrng/lrng_es_mgr.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Entropy sources management + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include "lrng_internal.h" + +struct lrng_state { + bool can_invalidate; /* Can invalidate batched entropy? */ + bool perform_seedwork; /* Can seed work be performed? */ + bool lrng_operational; /* Is DRNG operational? */ + bool lrng_fully_seeded; /* Is DRNG fully seeded? */ + bool lrng_min_seeded; /* Is DRNG minimally seeded? */ + bool all_online_numa_node_seeded;/* All NUMA DRNGs seeded? */ + + /* + * To ensure that external entropy providers cannot dominate the + * internal noise sources but yet cannot be dominated by internal + * noise sources, the following booleans are intended to allow + * external to provide seed once when a DRNG reseed occurs. This + * triggering of external noise source is performed even when the + * entropy pool has sufficient entropy. + */ + bool lrng_seed_hw; /* Allow HW to provide seed */ + bool lrng_seed_user; /* Allow user space to provide seed */ + + atomic_t boot_entropy_thresh; /* Reseed threshold */ + atomic_t reseed_in_progress; /* Flag for on executing reseed */ + struct work_struct lrng_seed_work; /* (re)seed work queue */ +}; + +static struct lrng_state lrng_state = { + false, false, false, false, false, false, true, true, + .boot_entropy_thresh = ATOMIC_INIT(LRNG_INIT_ENTROPY_BITS), + .reseed_in_progress = ATOMIC_INIT(0), +}; + +/********************************** Helper ***********************************/ + +/* External entropy provider is allowed to provide seed data */ +bool lrng_state_exseed_allow(enum lrng_external_noise_source source) +{ + if (source == lrng_noise_source_hw) + return lrng_state.lrng_seed_hw; + return lrng_state.lrng_seed_user; +} + +/* Enable / disable external entropy provider to furnish seed */ +void lrng_state_exseed_set(enum lrng_external_noise_source source, bool type) +{ + if (source == lrng_noise_source_hw) + lrng_state.lrng_seed_hw = type; + else + lrng_state.lrng_seed_user = type; +} + +static void lrng_state_exseed_allow_all(void) +{ + lrng_state_exseed_set(lrng_noise_source_hw, true); + lrng_state_exseed_set(lrng_noise_source_user, true); +} + +/* + * Reading of the LRNG pool is only allowed by one caller. The reading is + * only performed to (re)seed DRNGs. Thus, if this "lock" is already taken, + * the reseeding operation is in progress. The caller is not intended to wait + * but continue with its other operation. + */ +int lrng_pool_trylock(void) +{ + return atomic_cmpxchg(&lrng_state.reseed_in_progress, 0, 1); +} + +void lrng_pool_unlock(void) +{ + atomic_set(&lrng_state.reseed_in_progress, 0); +} + +/* Set new entropy threshold for reseeding during boot */ +void lrng_set_entropy_thresh(u32 new_entropy_bits) +{ + atomic_set(&lrng_state.boot_entropy_thresh, new_entropy_bits); +} + +/* + * Reset LRNG state - the entropy counters are reset, but the data that may + * or may not have entropy remains in the pools as this data will not hurt. + */ +void lrng_reset_state(void) +{ + lrng_pool_set_entropy(0); + lrng_pcpu_reset(); + lrng_state.lrng_operational = false; + lrng_state.lrng_fully_seeded = false; + lrng_state.lrng_min_seeded = false; + lrng_state.all_online_numa_node_seeded = false; + pr_debug("reset LRNG\n"); +} + +/* Set flag that all DRNGs are fully seeded */ +void lrng_pool_all_numa_nodes_seeded(bool set) +{ + lrng_state.all_online_numa_node_seeded = set; +} + +/* Return boolean whether LRNG reached minimally seed level */ +bool lrng_state_min_seeded(void) +{ + return lrng_state.lrng_min_seeded; +} + +/* Return boolean whether LRNG reached fully seed level */ +bool lrng_state_fully_seeded(void) +{ + return lrng_state.lrng_fully_seeded; +} + +/* Return boolean whether LRNG is considered fully operational */ +bool lrng_state_operational(void) +{ + return lrng_state.lrng_operational; +} + +/* Policy to check whether entropy buffer contains full seeded entropy */ +bool lrng_fully_seeded(bool fully_seeded, struct entropy_buf *eb) +{ + return ((eb->a_bits + eb->b_bits + eb->c_bits + eb->d_bits) >= + lrng_get_seed_entropy_osr(fully_seeded)); +} + +/* Mark one DRNG as not fully seeded */ +void lrng_unset_fully_seeded(struct lrng_drng *drng) +{ + drng->fully_seeded = false; + lrng_pool_all_numa_nodes_seeded(false); + + /* + * The init DRNG instance must always be fully seeded as this instance + * is the fall-back if any of the per-NUMA node DRNG instances is + * insufficiently seeded. Thus, we mark the entire LRNG as + * non-operational if the initial DRNG becomes not fully seeded. + */ + if (drng == lrng_drng_init_instance() && lrng_state_operational()) { + pr_debug("LRNG set to non-operational\n"); + lrng_state.lrng_operational = false; + lrng_state.lrng_fully_seeded = false; + + /* If sufficient entropy is available, reseed now. */ + lrng_pool_add_entropy(); + } +} + +/* Policy to enable LRNG operational mode */ +static void lrng_set_operational(u32 external_es) +{ + /* LRNG is operational if the initial DRNG is fully seeded ... */ + if (lrng_state.lrng_fully_seeded && + /* ... and either internal ES SP800-90B startup is complete ... */ + (lrng_sp80090b_startup_complete() || + /* ... or the external ES provided sufficient entropy. */ + (lrng_get_seed_entropy_osr(lrng_state_fully_seeded()) <= + external_es))) { + lrng_state.lrng_operational = true; + lrng_process_ready_list(); + lrng_init_wakeup(); + pr_info("LRNG fully operational\n"); + } +} + +/* Available entropy in the entire LRNG considering all entropy sources */ +u32 lrng_avail_entropy(void) +{ + u32 ent_thresh = lrng_security_strength(); + + /* + * Apply oversampling during initialization according to SP800-90C as + * we request a larger buffer from the ES. + */ + if (lrng_sp80090c_compliant() && + !lrng_state.all_online_numa_node_seeded) + ent_thresh += CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS; + + return lrng_pcpu_avail_entropy() + lrng_avail_aux_entropy() + + lrng_archrandom_entropylevel(ent_thresh) + + lrng_jent_entropylevel(ent_thresh); +} + +/* + * lrng_init_ops() - Set seed stages of LRNG + * + * Set the slow noise source reseed trigger threshold. The initial threshold + * is set to the minimum data size that can be read from the pool: a word. Upon + * reaching this value, the next seed threshold of 128 bits is set followed + * by 256 bits. + * + * @eb: buffer containing the size of entropy currently injected into DRNG + */ +void lrng_init_ops(struct entropy_buf *eb) +{ + struct lrng_state *state = &lrng_state; + u32 requested_bits, seed_bits, external_es; + + if (state->lrng_operational) + return; + + requested_bits = lrng_get_seed_entropy_osr( + state->all_online_numa_node_seeded); + + /* + * Entropy provided by external entropy sources - if they provide + * the requested amount of entropy, unblock the interface. + */ + external_es = eb->a_bits + eb->c_bits + eb->d_bits; + seed_bits = external_es + eb->b_bits; + + /* DRNG is seeded with full security strength */ + if (state->lrng_fully_seeded) { + lrng_set_operational(external_es); + lrng_set_entropy_thresh(requested_bits); + } else if (lrng_fully_seeded(state->all_online_numa_node_seeded, eb)) { + if (state->can_invalidate) + invalidate_batched_entropy(); + + state->lrng_fully_seeded = true; + lrng_set_operational(external_es); + state->lrng_min_seeded = true; + pr_info("LRNG fully seeded with %u bits of entropy\n", + seed_bits); + lrng_set_entropy_thresh(requested_bits); + } else if (!state->lrng_min_seeded) { + + /* DRNG is seeded with at least 128 bits of entropy */ + if (seed_bits >= LRNG_MIN_SEED_ENTROPY_BITS) { + if (state->can_invalidate) + invalidate_batched_entropy(); + + state->lrng_min_seeded = true; + pr_info("LRNG minimally seeded with %u bits of entropy\n", + seed_bits); + lrng_set_entropy_thresh(requested_bits); + lrng_init_wakeup(); + + /* DRNG is seeded with at least LRNG_INIT_ENTROPY_BITS bits */ + } else if (seed_bits >= LRNG_INIT_ENTROPY_BITS) { + pr_info("LRNG initial entropy level %u bits of entropy\n", + seed_bits); + lrng_set_entropy_thresh(LRNG_MIN_SEED_ENTROPY_BITS); + } + } +} + +int __init rand_initialize(void) +{ + struct seed { + ktime_t time; + unsigned long data[(LRNG_MAX_DIGESTSIZE / + sizeof(unsigned long))]; + struct new_utsname utsname; + } seed __aligned(LRNG_KCAPI_ALIGN); + unsigned int i; + + BUILD_BUG_ON(LRNG_MAX_DIGESTSIZE % sizeof(unsigned long)); + + seed.time = ktime_get_real(); + + for (i = 0; i < ARRAY_SIZE(seed.data); i++) { + if (!arch_get_random_seed_long_early(&(seed.data[i])) && + !arch_get_random_long_early(&seed.data[i])) + seed.data[i] = random_get_entropy(); + } + memcpy(&seed.utsname, utsname(), sizeof(*(utsname()))); + + lrng_pool_insert_aux((u8 *)&seed, sizeof(seed), 0); + memzero_explicit(&seed, sizeof(seed)); + + /* Initialize the seed work queue */ + INIT_WORK(&lrng_state.lrng_seed_work, lrng_drng_seed_work); + lrng_state.perform_seedwork = true; + + lrng_drngs_init_cc20(true); + invalidate_batched_entropy(); + + lrng_state.can_invalidate = true; + + return 0; +} + +/* Interface requesting a reseed of the DRNG */ +void lrng_pool_add_entropy(void) +{ + /* + * Once all DRNGs are fully seeded, the interrupt noise + * sources will not trigger any reseeding any more. + */ + if (likely(lrng_state.all_online_numa_node_seeded)) + return; + + /* Only try to reseed if the DRNG is alive. */ + if (!lrng_get_available()) + return; + + /* Only trigger the DRNG reseed if we have collected entropy. */ + if (lrng_avail_entropy() < + atomic_read_u32(&lrng_state.boot_entropy_thresh)) + return; + + /* Ensure that the seeding only occurs once at any given time. */ + if (lrng_pool_trylock()) + return; + + /* Seed the DRNG with any available noise. */ + if (lrng_state.perform_seedwork) + schedule_work(&lrng_state.lrng_seed_work); + else + lrng_drng_seed_work(NULL); +} + +/* Fill the seed buffer with data from the noise sources */ +void lrng_fill_seed_buffer(struct entropy_buf *entropy_buf, u32 requested_bits) +{ + struct lrng_state *state = &lrng_state; + u32 req_ent = lrng_sp80090c_compliant() ? + lrng_security_strength() : LRNG_MIN_SEED_ENTROPY_BITS; + + /* Guarantee that requested bits is a multiple of bytes */ + BUILD_BUG_ON(LRNG_DRNG_SECURITY_STRENGTH_BITS % 8); + + /* always reseed the DRNG with the current time stamp */ + entropy_buf->now = random_get_entropy(); + + /* + * Require at least 128 bits of entropy for any reseed. If the LRNG is + * operated SP800-90C compliant we want to comply with SP800-90A section + * 9.2 mandating that DRNG is reseeded with the security strength. + */ + if (state->lrng_fully_seeded && (lrng_avail_entropy() < req_ent)) { + entropy_buf->a_bits = entropy_buf->b_bits = 0; + entropy_buf->c_bits = entropy_buf->d_bits = 0; + goto wakeup; + } + + /* Concatenate the output of the entropy sources. */ + entropy_buf->b_bits = lrng_pcpu_pool_hash(entropy_buf->b, + requested_bits, + state->lrng_fully_seeded); + entropy_buf->c_bits = lrng_get_arch(entropy_buf->c, requested_bits); + entropy_buf->d_bits = lrng_get_jent(entropy_buf->d, requested_bits); + lrng_get_backtrack_aux(entropy_buf, requested_bits); + + /* allow external entropy provider to provide seed */ + lrng_state_exseed_allow_all(); + +wakeup: + /* + * Shall we wake up user space writers? This location covers + * ensures that the user space provider does not dominate the internal + * noise sources since in case the first call of this function finds + * sufficient entropy in the entropy pool, it will not trigger the + * wakeup. This implies that when the next /dev/urandom read happens, + * the entropy pool is drained. + */ + lrng_writer_wakeup(); +} diff --git a/drivers/char/lrng/lrng_health.c b/drivers/char/lrng/lrng_health.c new file mode 100644 index 0000000000000..61f266537f16a --- /dev/null +++ b/drivers/char/lrng/lrng_health.c @@ -0,0 +1,410 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Linux Random Number Generator (LRNG) Health Testing + * + * Copyright (C) 2019 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_internal.h" + +/* Stuck Test */ +struct lrng_stuck_test { + u32 last_time; /* Stuck test: time of previous IRQ */ + u32 last_delta; /* Stuck test: delta of previous IRQ */ + u32 last_delta2; /* Stuck test: 2. time derivation of prev IRQ */ +}; + +/* Repetition Count Test */ +struct lrng_rct { + atomic_t rct_count; /* Number of stuck values */ +}; + +/* Adaptive Proportion Test */ +struct lrng_apt { + /* Data window size */ +#define LRNG_APT_WINDOW_SIZE 512 + /* LSB of time stamp to process */ +#define LRNG_APT_LSB 16 +#define LRNG_APT_WORD_MASK (LRNG_APT_LSB - 1) + atomic_t apt_count; /* APT counter */ + atomic_t apt_base; /* APT base reference */ + + atomic_t apt_trigger; + bool apt_base_set; /* Is APT base set? */ +}; + +/* The health test code must operate lock-less */ +struct lrng_health { + struct lrng_rct rct; + struct lrng_apt apt; + + bool health_test_enabled; + + /* SP800-90B startup health tests */ +#define LRNG_SP80090B_STARTUP_SAMPLES 1024 +#define LRNG_SP80090B_STARTUP_BLOCKS ((LRNG_SP80090B_STARTUP_SAMPLES + \ + LRNG_APT_WINDOW_SIZE - 1) / \ + LRNG_APT_WINDOW_SIZE) + bool sp80090b_startup_done; + atomic_t sp80090b_startup_blocks; +}; + +static struct lrng_health lrng_health = { + .rct.rct_count = ATOMIC_INIT(0), + + .apt.apt_count = ATOMIC_INIT(0), + .apt.apt_base = ATOMIC_INIT(-1), + .apt.apt_trigger = ATOMIC_INIT(LRNG_APT_WINDOW_SIZE), + .apt.apt_base_set = false, + + .health_test_enabled = true, + + .sp80090b_startup_blocks = ATOMIC_INIT(LRNG_SP80090B_STARTUP_BLOCKS), + .sp80090b_startup_done = false, +}; + +static DEFINE_PER_CPU(struct lrng_stuck_test, lrng_stuck_test); + +static bool lrng_sp80090b_health_requested(void) +{ + /* Health tests are only requested in FIPS mode */ + return fips_enabled; +} + +static bool lrng_sp80090b_health_enabled(void) +{ + struct lrng_health *health = &lrng_health; + + return lrng_sp80090b_health_requested() && health->health_test_enabled; +} + +/*************************************************************************** + * SP800-90B Compliance + * + * If the Linux-RNG is booted into FIPS mode, the following interfaces + * provide an SP800-90B compliant noise source: + * + * * /dev/random + * * getrandom(2) + * * get_random_bytes when using it in conjunction with + * add_random_ready_callback + * + * All other interfaces, including /dev/urandom or get_random_bytes without + * the add_random_ready_callback cannot claim to use an SP800-90B compliant + * noise source. + ***************************************************************************/ + +/* + * Perform SP800-90B startup testing + */ +static void lrng_sp80090b_startup(struct lrng_health *health) +{ + if (!health->sp80090b_startup_done && + atomic_dec_and_test(&health->sp80090b_startup_blocks)) { + struct entropy_buf eb; + + health->sp80090b_startup_done = true; + pr_info("SP800-90B startup health tests completed\n"); + memset(&eb, 0, sizeof(eb)); + lrng_init_ops(&eb); + + /* + * Force a reseed of DRNGs to ensure they are seeded with + * entropy that passed the SP800-90B health tests. + * As the DRNG always will reseed before generating + * random numbers, it does not need a reseed trigger. + */ + lrng_drng_force_reseed(); + } +} + +/* + * Handle failure of SP800-90B startup testing + */ +static void lrng_sp80090b_startup_failure(struct lrng_health *health) +{ + /* Reset of LRNG and its entropy - NOTE: we are in atomic context */ + lrng_reset(); + + /* + * Reset the SP800-90B startup test. + * + * NOTE SP800-90B section 4.3 bullet 4 does not specify what + * exactly is to be done in case of failure! Thus, we do what + * makes sense, i.e. restarting the health test and thus gating + * the output function of /dev/random and getrandom(2). + */ + atomic_set(&health->sp80090b_startup_blocks, + LRNG_SP80090B_STARTUP_BLOCKS); +} + +/* + * Handle failure of SP800-90B runtime testing + */ +static void lrng_sp80090b_runtime_failure(struct lrng_health *health) +{ + lrng_sp80090b_startup_failure(health); + health->sp80090b_startup_done = false; +} + +static void lrng_sp80090b_failure(struct lrng_health *health) +{ + if (health->sp80090b_startup_done) { + pr_err("SP800-90B runtime health test failure - invalidating all existing entropy and initiate SP800-90B startup\n"); + lrng_sp80090b_runtime_failure(health); + } else { + pr_err("SP800-90B startup test failure - resetting\n"); + lrng_sp80090b_startup_failure(health); + } +} + +/* + * Is the SP800-90B startup testing complete? + * + * This function is called by the LRNG to determine whether to unblock + * a certain user interface. Therefore, only the potentially blocking + * user interfaces are considered SP800-90B compliant. + */ +bool lrng_sp80090b_startup_complete(void) +{ + struct lrng_health *health = &lrng_health; + + return (lrng_sp80090b_health_enabled()) ? + health->sp80090b_startup_done : true; +} + +bool lrng_sp80090b_compliant(void) +{ + struct lrng_health *health = &lrng_health; + + return lrng_sp80090b_health_enabled() && health->sp80090b_startup_done; +} + +/*************************************************************************** + * Adaptive Proportion Test + * + * This test complies with SP800-90B section 4.4.2. + ***************************************************************************/ + +/* + * Reset the APT counter + * + * @health [in] Reference to health state + */ +static void lrng_apt_reset(struct lrng_health *health, + unsigned int time_masked) +{ + struct lrng_apt *apt = &health->apt; + + pr_debug("APT value %d for base %d\n", + atomic_read(&apt->apt_count), atomic_read(&apt->apt_base)); + + /* Reset APT */ + atomic_set(&apt->apt_count, 0); + atomic_set(&apt->apt_base, time_masked); +} + +static void lrng_apt_restart(struct lrng_health *health) +{ + struct lrng_apt *apt = &health->apt; + + atomic_set(&apt->apt_trigger, LRNG_APT_WINDOW_SIZE); +} + +/* + * Insert a new entropy event into APT + * + * This function does is void as it does not decide about the fate of a time + * stamp. An APT failure can only happen at the same time of a stuck test + * failure. Thus, the stuck failure will already decide how the time stamp + * is handled. + * + * @health [in] Reference to health state + * @now_time [in] Time stamp to process + */ +static void lrng_apt_insert(struct lrng_health *health, + unsigned int now_time) +{ + struct lrng_apt *apt = &health->apt; + + if (!lrng_sp80090b_health_requested()) + return; + + now_time &= LRNG_APT_WORD_MASK; + + /* Initialization of APT */ + if (!apt->apt_base_set) { + atomic_set(&apt->apt_base, now_time); + apt->apt_base_set = true; + return; + } + + if (now_time == (unsigned int)atomic_read(&apt->apt_base)) { + u32 apt_val = (u32)atomic_inc_return_relaxed(&apt->apt_count); + + if (apt_val >= CONFIG_LRNG_APT_CUTOFF) + lrng_sp80090b_failure(health); + } + + if (atomic_dec_and_test(&apt->apt_trigger)) { + lrng_apt_restart(health); + lrng_apt_reset(health, now_time); + lrng_sp80090b_startup(health); + } +} + +/*************************************************************************** + * Repetition Count Test + * + * The LRNG uses an enhanced version of the Repetition Count Test + * (RCT) specified in SP800-90B section 4.4.1. Instead of counting identical + * back-to-back values, the input to the RCT is the counting of the stuck + * values while filling the entropy pool. + * + * The RCT is applied with an alpha of 2^-30 compliant to FIPS 140-2 IG 9.8. + * + * During the counting operation, the LRNG always calculates the RCT + * cut-off value of C. If that value exceeds the allowed cut-off value, + * the LRNG will invalidate all entropy for the entropy pool which implies + * that no data can be extracted from the entropy pool unless new entropy + * is received. + ***************************************************************************/ + +/* + * Hot code path - Insert data for Repetition Count Test + * + * @health: Reference to health information + * @stuck: Decision of stuck test + */ +static void lrng_rct(struct lrng_health *health, int stuck) +{ + struct lrng_rct *rct = &health->rct; + + if (!lrng_sp80090b_health_requested()) + return; + + if (stuck) { + u32 rct_count = atomic_add_return_relaxed(1, &rct->rct_count); + + pr_debug("RCT count: %u\n", rct_count); + + /* + * The cutoff value is based on the following consideration: + * alpha = 2^-30 as recommended in FIPS 140-2 IG 9.8. + * In addition, we imply an entropy value H of 1 bit as this + * is the minimum entropy required to provide full entropy. + * + * Note, rct_count (which equals to value B in the + * pseudo code of SP800-90B section 4.4.1) starts with zero. + * Hence we need to subtract one from the cutoff value as + * calculated following SP800-90B. + */ + if (rct_count >= CONFIG_LRNG_RCT_CUTOFF) { + atomic_set(&rct->rct_count, 0); + + /* + * APT must start anew as we consider all previously + * recorded data to contain no entropy. + */ + lrng_apt_restart(health); + + lrng_sp80090b_failure(health); + } + } else { + atomic_set(&rct->rct_count, 0); + } +} + +/*************************************************************************** + * Stuck Test + * + * Checking the: + * 1st derivative of the event occurrence (time delta) + * 2nd derivative of the event occurrence (delta of time deltas) + * 3rd derivative of the event occurrence (delta of delta of time deltas) + * + * All values must always be non-zero. The stuck test is only valid disabled if + * high-resolution time stamps are identified after initialization. + ***************************************************************************/ + +static u32 lrng_delta(u32 prev, u32 next) +{ + /* + * Note that this (unsigned) subtraction does yield the correct value + * in the wraparound-case, i.e. when next < prev. + */ + return (next - prev); +} + +/* + * Hot code path + * + * @health: Reference to health information + * @now: Event time + * @return: 0 event occurrence not stuck (good time stamp) + * != 0 event occurrence stuck (reject time stamp) + */ +static int lrng_irq_stuck(struct lrng_stuck_test *stuck, u32 now_time) +{ + u32 delta = lrng_delta(stuck->last_time, now_time); + u32 delta2 = lrng_delta(stuck->last_delta, delta); + u32 delta3 = lrng_delta(stuck->last_delta2, delta2); + + stuck->last_time = now_time; + stuck->last_delta = delta; + stuck->last_delta2 = delta2; + + if (!delta || !delta2 || !delta3) + return 1; + + return 0; +} + +/*************************************************************************** + * Health test interfaces + ***************************************************************************/ + +/* + * Disable all health tests + */ +void lrng_health_disable(void) +{ + struct lrng_health *health = &lrng_health; + + health->health_test_enabled = false; + + if (lrng_sp80090b_health_requested()) + pr_warn("SP800-90B compliance requested but the Linux RNG is NOT SP800-90B compliant\n"); +} + +/* + * Hot code path - Perform health test on time stamp received from an event + * + * @now_time Time stamp + */ +enum lrng_health_res lrng_health_test(u32 now_time) +{ + struct lrng_health *health = &lrng_health; + struct lrng_stuck_test *stuck_test = this_cpu_ptr(&lrng_stuck_test); + int stuck; + + if (!health->health_test_enabled) + return lrng_health_pass; + + lrng_apt_insert(health, now_time); + + stuck = lrng_irq_stuck(stuck_test, now_time); + lrng_rct(health, stuck); + if (stuck) { + /* SP800-90B disallows using a failing health test time stamp */ + return lrng_sp80090b_health_requested() ? + lrng_health_fail_drop : lrng_health_fail_use; + } + + return lrng_health_pass; +} diff --git a/drivers/char/lrng/lrng_interfaces.c b/drivers/char/lrng/lrng_interfaces.c new file mode 100644 index 0000000000000..134e556ac7392 --- /dev/null +++ b/drivers/char/lrng/lrng_interfaces.c @@ -0,0 +1,654 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG User and kernel space interfaces + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#include "lrng_internal.h" + +/* + * If the entropy count falls under this number of bits, then we + * should wake up processes which are selecting or polling on write + * access to /dev/random. + */ +u32 lrng_write_wakeup_bits = (LRNG_WRITE_WAKEUP_ENTROPY << 3); + +static LIST_HEAD(lrng_ready_list); +static DEFINE_SPINLOCK(lrng_ready_list_lock); + +static DECLARE_WAIT_QUEUE_HEAD(lrng_write_wait); +static DECLARE_WAIT_QUEUE_HEAD(lrng_init_wait); +static struct fasync_struct *fasync; + +/********************************** Helper ***********************************/ + +/* Is the DRNG seed level too low? */ +static bool lrng_need_entropy(void) +{ + return (lrng_avail_aux_entropy() < lrng_write_wakeup_bits); +} + +void lrng_writer_wakeup(void) +{ + if (lrng_need_entropy() && wq_has_sleeper(&lrng_write_wait)) { + wake_up_interruptible(&lrng_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } +} + +void lrng_init_wakeup(void) +{ + wake_up_all(&lrng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); +} + +/** + * lrng_process_ready_list() - Ping all kernel internal callers waiting until + * the DRNG is completely initialized to inform that the DRNG reached that + * seed level. + * + * When the SP800-90B testing is enabled, the ping only happens if the SP800-90B + * startup health tests are completed. This implies that kernel internal + * callers always have an SP800-90B compliant noise source when being + * pinged. + */ +void lrng_process_ready_list(void) +{ + unsigned long flags; + struct random_ready_callback *rdy, *tmp; + + if (!lrng_state_operational()) + return; + + spin_lock_irqsave(&lrng_ready_list_lock, flags); + list_for_each_entry_safe(rdy, tmp, &lrng_ready_list, list) { + struct module *owner = rdy->owner; + + list_del_init(&rdy->list); + rdy->func(rdy); + module_put(owner); + } + spin_unlock_irqrestore(&lrng_ready_list_lock, flags); +} + +void lrng_debug_report_seedlevel(const char *name) +{ +#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM + static void *previous = NULL; + void *caller = (void *) _RET_IP_; + + if (READ_ONCE(previous) == caller) + return; + + if (!lrng_state_min_seeded()) + pr_notice("%pS %s called without reaching minimally seeded level (available entropy %u)\n", + caller, name, lrng_avail_entropy()); + + WRITE_ONCE(previous, caller); +#endif +} + +/************************ LRNG kernel input interfaces ************************/ + +/* + * add_hwgenerator_randomness() - Interface for in-kernel drivers of true + * hardware RNGs. + * + * Those devices may produce endless random bits and will be throttled + * when our pool is full. + * + * @buffer: buffer holding the entropic data from HW noise sources to be used to + * insert into entropy pool. + * @count: length of buffer + * @entropy_bits: amount of entropy in buffer (value is in bits) + */ +void add_hwgenerator_randomness(const char *buffer, size_t count, + size_t entropy_bits) +{ + /* + * Suspend writing if we are fully loaded with entropy. + * We'll be woken up again once below lrng_write_wakeup_thresh, + * or when the calling thread is about to terminate. + */ + wait_event_interruptible(lrng_write_wait, + lrng_need_entropy() || + lrng_state_exseed_allow(lrng_noise_source_hw) || + kthread_should_stop()); + lrng_state_exseed_set(lrng_noise_source_hw, false); + lrng_pool_insert_aux(buffer, count, entropy_bits); +} +EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); + +/* + * add_bootloader_randomness() - Handle random seed passed by bootloader. + * + * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise + * it would be regarded as device data. + * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. + * + * @buf: buffer holding the entropic data from HW noise sources to be used to + * insert into entropy pool. + * @size: length of buffer + */ +void add_bootloader_randomness(const void *buf, unsigned int size) +{ + lrng_pool_insert_aux(buf, size, + IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER) ? + size * 8 : 0); +} +EXPORT_SYMBOL_GPL(add_bootloader_randomness); + +/* + * Callback for HID layer -- use the HID event values to stir the entropy pool + */ +void add_input_randomness(unsigned int type, unsigned int code, + unsigned int value) +{ + static unsigned char last_value; + + /* ignore autorepeat and the like */ + if (value == last_value) + return; + + last_value = value; + + lrng_pcpu_array_add_u32((type << 4) ^ code ^ (code >> 4) ^ value); +} +EXPORT_SYMBOL_GPL(add_input_randomness); + +/* + * add_device_randomness() - Add device- or boot-specific data to the entropy + * pool to help initialize it. + * + * None of this adds any entropy; it is meant to avoid the problem of + * the entropy pool having similar initial state across largely + * identical devices. + * + * @buf: buffer holding the entropic data from HW noise sources to be used to + * insert into entropy pool. + * @size: length of buffer + */ +void add_device_randomness(const void *buf, unsigned int size) +{ + lrng_pool_insert_aux((u8 *)buf, size, 0); +} +EXPORT_SYMBOL(add_device_randomness); + +#ifdef CONFIG_BLOCK +void rand_initialize_disk(struct gendisk *disk) { } +void add_disk_randomness(struct gendisk *disk) { } +EXPORT_SYMBOL(add_disk_randomness); +#endif + +#ifndef CONFIG_LRNG_IRQ +void add_interrupt_randomness(int irq) { } +EXPORT_SYMBOL(add_interrupt_randomness); +#endif + +/* + * del_random_ready_callback() - Delete a previously registered readiness + * callback function. + * + * @rdy: callback definition that was registered initially + */ +void del_random_ready_callback(struct random_ready_callback *rdy) +{ + unsigned long flags; + struct module *owner = NULL; + + spin_lock_irqsave(&lrng_ready_list_lock, flags); + if (!list_empty(&rdy->list)) { + list_del_init(&rdy->list); + owner = rdy->owner; + } + spin_unlock_irqrestore(&lrng_ready_list_lock, flags); + + module_put(owner); +} +EXPORT_SYMBOL(del_random_ready_callback); + +/* + * add_random_ready_callback() - Add a callback function that will be invoked + * when the DRNG is fully initialized and seeded. + * + * @rdy: callback definition to be invoked when the LRNG is seeded + * + * Return: + * * 0 if callback is successfully added + * * -EALREADY if pool is already initialised (callback not called) + * * -ENOENT if module for callback is not alive + */ +int add_random_ready_callback(struct random_ready_callback *rdy) +{ + struct module *owner; + unsigned long flags; + int err = -EALREADY; + + if (likely(lrng_state_operational())) + return err; + + owner = rdy->owner; + if (!try_module_get(owner)) + return -ENOENT; + + spin_lock_irqsave(&lrng_ready_list_lock, flags); + if (lrng_state_operational()) + goto out; + + owner = NULL; + + list_add(&rdy->list, &lrng_ready_list); + err = 0; + +out: + spin_unlock_irqrestore(&lrng_ready_list_lock, flags); + + module_put(owner); + + return err; +} +EXPORT_SYMBOL(add_random_ready_callback); + +/*********************** LRNG kernel output interfaces ************************/ + +/* + * get_random_bytes() - Provider of cryptographic strong random numbers for + * kernel-internal usage. + * + * This function is appropriate for all in-kernel use cases. However, + * it will always use the ChaCha20 DRNG. + * + * @buf: buffer to store the random bytes + * @nbytes: size of the buffer + */ +void get_random_bytes(void *buf, int nbytes) +{ + lrng_drng_get_atomic((u8 *)buf, (u32)nbytes); + lrng_debug_report_seedlevel("get_random_bytes"); +} +EXPORT_SYMBOL(get_random_bytes); + +/* + * get_random_bytes_full() - Provider of cryptographic strong random numbers + * for kernel-internal usage. + * + * This function is appropriate only for non-atomic use cases as this + * function may sleep. Though, it provides access to the full functionality + * of LRNG including the switchable DRNG support, that may support other + * DRNGs such as the SP800-90A DRBG. + * + * @buf: buffer to store the random bytes + * @nbytes: size of the buffer + */ +void get_random_bytes_full(void *buf, int nbytes) +{ + lrng_drng_get_sleep((u8 *)buf, (u32)nbytes); + lrng_debug_report_seedlevel("get_random_bytes_full"); +} +EXPORT_SYMBOL(get_random_bytes_full); + +/* + * wait_for_random_bytes() - Wait for the LRNG to be seeded and thus + * guaranteed to supply cryptographically secure random numbers. + * + * This applies to: the /dev/urandom device, the get_random_bytes function, + * and the get_random_{u32,u64,int,long} family of functions. Using any of + * these functions without first calling this function forfeits the guarantee + * of security. + * + * Return: + * * 0 if the LRNG has been seeded. + * * -ERESTARTSYS if the function was interrupted by a signal. + */ +int wait_for_random_bytes(void) +{ + if (likely(lrng_state_min_seeded())) + return 0; + return wait_event_interruptible(lrng_init_wait, + lrng_state_min_seeded()); +} +EXPORT_SYMBOL(wait_for_random_bytes); + +/* + * get_random_bytes_arch() - This function will use the architecture-specific + * hardware random number generator if it is available. + * + * The arch-specific hw RNG will almost certainly be faster than what we can + * do in software, but it is impossible to verify that it is implemented + * securely (as opposed, to, say, the AES encryption of a sequence number using + * a key known by the NSA). So it's useful if we need the speed, but only if + * we're willing to trust the hardware manufacturer not to have put in a back + * door. + * + * @buf: buffer allocated by caller to store the random data in + * @nbytes: length of outbuf + * + * Return: number of bytes filled in. + */ +int __must_check get_random_bytes_arch(void *buf, int nbytes) +{ + u8 *p = buf; + + while (nbytes) { + unsigned long v; + int chunk = min_t(int, nbytes, sizeof(unsigned long)); + + if (!arch_get_random_long(&v)) + break; + + memcpy(p, &v, chunk); + p += chunk; + nbytes -= chunk; + } + + if (nbytes) + lrng_drng_get_atomic((u8 *)p, (u32)nbytes); + + return nbytes; +} +EXPORT_SYMBOL(get_random_bytes_arch); + +/* + * Returns whether or not the LRNG has been seeded. + * + * Returns: true if the urandom pool has been seeded. + * false if the urandom pool has not been seeded. + */ +bool rng_is_initialized(void) +{ + return lrng_state_operational(); +} +EXPORT_SYMBOL(rng_is_initialized); + +/************************ LRNG user output interfaces *************************/ + +static ssize_t lrng_read_common(char __user *buf, size_t nbytes) +{ + ssize_t ret = 0; + u8 tmpbuf[LRNG_DRNG_BLOCKSIZE] __aligned(LRNG_KCAPI_ALIGN); + u8 *tmp_large = NULL, *tmp = tmpbuf; + u32 tmplen = sizeof(tmpbuf); + + if (nbytes == 0) + return 0; + + /* + * Satisfy large read requests -- as the common case are smaller + * request sizes, such as 16 or 32 bytes, avoid a kmalloc overhead for + * those by using the stack variable of tmpbuf. + */ + if (!CONFIG_BASE_SMALL && (nbytes > sizeof(tmpbuf))) { + tmplen = min_t(u32, nbytes, LRNG_DRNG_MAX_REQSIZE); + tmp_large = kmalloc(tmplen + LRNG_KCAPI_ALIGN, GFP_KERNEL); + if (!tmp_large) + tmplen = sizeof(tmpbuf); + else + tmp = PTR_ALIGN(tmp_large, LRNG_KCAPI_ALIGN); + } + + while (nbytes) { + u32 todo = min_t(u32, nbytes, tmplen); + int rc = 0; + + /* Reschedule if we received a large request. */ + if ((tmp_large) && need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + schedule(); + } + + rc = lrng_drng_get_sleep(tmp, todo); + if (rc <= 0) { + if (rc < 0) + ret = rc; + break; + } + if (copy_to_user(buf, tmp, rc)) { + ret = -EFAULT; + break; + } + + nbytes -= rc; + buf += rc; + ret += rc; + } + + /* Wipe data just returned from memory */ + if (tmp_large) + kfree_sensitive(tmp_large); + else + memzero_explicit(tmpbuf, sizeof(tmpbuf)); + + return ret; +} + +static ssize_t +lrng_read_common_block(int nonblock, char __user *buf, size_t nbytes) +{ + if (nbytes == 0) + return 0; + + if (unlikely(!lrng_state_operational())) { + int ret; + + if (nonblock) + return -EAGAIN; + + ret = wait_event_interruptible(lrng_init_wait, + lrng_state_operational()); + if (unlikely(ret)) + return ret; + } + + return lrng_read_common(buf, nbytes); +} + +static ssize_t lrng_drng_read_block(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + return lrng_read_common_block(file->f_flags & O_NONBLOCK, buf, nbytes); +} + +static __poll_t lrng_random_poll(struct file *file, poll_table *wait) +{ + __poll_t mask; + + poll_wait(file, &lrng_init_wait, wait); + poll_wait(file, &lrng_write_wait, wait); + mask = 0; + if (lrng_state_operational()) + mask |= EPOLLIN | EPOLLRDNORM; + if (lrng_need_entropy() || + lrng_state_exseed_allow(lrng_noise_source_user)) { + lrng_state_exseed_set(lrng_noise_source_user, false); + mask |= EPOLLOUT | EPOLLWRNORM; + } + return mask; +} + +static ssize_t lrng_drng_write_common(const char __user *buffer, size_t count, + u32 entropy_bits) +{ + ssize_t ret = 0; + u8 buf[64] __aligned(LRNG_KCAPI_ALIGN); + const char __user *p = buffer; + u32 orig_entropy_bits = entropy_bits; + + if (!lrng_get_available()) + return -EAGAIN; + + count = min_t(size_t, count, INT_MAX); + while (count > 0) { + size_t bytes = min_t(size_t, count, sizeof(buf)); + u32 ent = min_t(u32, bytes<<3, entropy_bits); + + if (copy_from_user(&buf, p, bytes)) + return -EFAULT; + /* Inject data into entropy pool */ + lrng_pool_insert_aux(buf, bytes, ent); + + count -= bytes; + p += bytes; + ret += bytes; + entropy_bits -= ent; + + cond_resched(); + } + + /* Force reseed of DRNG during next data request. */ + if (!orig_entropy_bits) + lrng_drng_force_reseed(); + + return ret; +} + +static ssize_t lrng_drng_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + if (!lrng_state_min_seeded()) + pr_notice_ratelimited("%s - use of insufficiently seeded DRNG (%zu bytes read)\n", + current->comm, nbytes); + else if (!lrng_state_operational()) + pr_debug_ratelimited("%s - use of not fully seeded DRNG (%zu bytes read)\n", + current->comm, nbytes); + + return lrng_read_common(buf, nbytes); +} + +static ssize_t lrng_drng_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + return lrng_drng_write_common(buffer, count, 0); +} + +static long lrng_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + u32 digestsize_bits; + int size, ent_count_bits; + int __user *p = (int __user *)arg; + + switch (cmd) { + case RNDGETENTCNT: + ent_count_bits = lrng_avail_entropy(); + if (put_user(ent_count_bits, p)) + return -EFAULT; + return 0; + case RNDADDTOENTCNT: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count_bits, p)) + return -EFAULT; + ent_count_bits = (int)lrng_avail_aux_entropy() + ent_count_bits; + if (ent_count_bits < 0) + ent_count_bits = 0; + digestsize_bits = lrng_get_digestsize(); + if (ent_count_bits > digestsize_bits) + ent_count_bits = digestsize_bits; + lrng_pool_set_entropy(ent_count_bits); + return 0; + case RNDADDENTROPY: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count_bits, p++)) + return -EFAULT; + if (ent_count_bits < 0) + return -EINVAL; + if (get_user(size, p++)) + return -EFAULT; + if (size < 0) + return -EINVAL; + /* there cannot be more entropy than data */ + ent_count_bits = min(ent_count_bits, size<<3); + return lrng_drng_write_common((const char __user *)p, size, + ent_count_bits); + case RNDZAPENTCNT: + case RNDCLEARPOOL: + /* Clear the entropy pool counter. */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + lrng_pool_set_entropy(0); + return 0; + case RNDRESEEDCRNG: + /* + * We leave the capability check here since it is present + * in the upstream's RNG implementation. Yet, user space + * can trigger a reseed as easy as writing into /dev/random + * or /dev/urandom where no privilege is needed. + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + /* Force a reseed of all DRNGs */ + lrng_drng_force_reseed(); + return 0; + default: + return -EINVAL; + } +} + +static int lrng_fasync(int fd, struct file *filp, int on) +{ + return fasync_helper(fd, filp, on, &fasync); +} + +const struct file_operations random_fops = { + .read = lrng_drng_read_block, + .write = lrng_drng_write, + .poll = lrng_random_poll, + .unlocked_ioctl = lrng_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = lrng_fasync, + .llseek = noop_llseek, +}; + +const struct file_operations urandom_fops = { + .read = lrng_drng_read, + .write = lrng_drng_write, + .unlocked_ioctl = lrng_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = lrng_fasync, + .llseek = noop_llseek, +}; + +SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, + unsigned int, flags) +{ + if (flags & ~(GRND_NONBLOCK|GRND_RANDOM|GRND_INSECURE)) + return -EINVAL; + + /* + * Requesting insecure and blocking randomness at the same time makes + * no sense. + */ + if ((flags & + (GRND_INSECURE|GRND_RANDOM)) == (GRND_INSECURE|GRND_RANDOM)) + return -EINVAL; + + if (count > INT_MAX) + count = INT_MAX; + + if (flags & GRND_INSECURE) + return lrng_drng_read(NULL, buf, count, NULL); + + return lrng_read_common_block(flags & GRND_NONBLOCK, buf, count); +} diff --git a/drivers/char/lrng/lrng_internal.h b/drivers/char/lrng/lrng_internal.h new file mode 100644 index 0000000000000..c01c4e5fa44f5 --- /dev/null +++ b/drivers/char/lrng/lrng_internal.h @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2018 - 2021, Stephan Mueller + */ + +#ifndef _LRNG_INTERNAL_H +#define _LRNG_INTERNAL_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/*************************** General LRNG parameter ***************************/ + +/* Security strength of LRNG -- this must match DRNG security strength */ +#define LRNG_DRNG_SECURITY_STRENGTH_BYTES 32 +#define LRNG_DRNG_SECURITY_STRENGTH_BITS (LRNG_DRNG_SECURITY_STRENGTH_BYTES * 8) +#define LRNG_DRNG_BLOCKSIZE 64 /* Maximum of DRNG block sizes */ +#define LRNG_DRNG_INIT_SEED_SIZE_BITS (LRNG_DRNG_SECURITY_STRENGTH_BITS + \ + CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS) +#define LRNG_DRNG_INIT_SEED_SIZE_BYTES (LRNG_DRNG_INIT_SEED_SIZE_BITS >> 3) + +/* + * SP800-90A defines a maximum request size of 1<<16 bytes. The given value is + * considered a safer margin. + * + * This value is allowed to be changed. + */ +#define LRNG_DRNG_MAX_REQSIZE (1<<12) + +/* + * SP800-90A defines a maximum number of requests between reseeds of 2^48. + * The given value is considered a much safer margin, balancing requests for + * frequent reseeds with the need to conserve entropy. This value MUST NOT be + * larger than INT_MAX because it is used in an atomic_t. + * + * This value is allowed to be changed. + */ +#define LRNG_DRNG_RESEED_THRESH (1<<20) + +/* + * Maximum DRNG generation operations without reseed having full entropy + * This value defines the absolute maximum value of DRNG generation operations + * without a reseed holding full entropy. LRNG_DRNG_RESEED_THRESH is the + * threshold when a new reseed is attempted. But it is possible that this fails + * to deliver full entropy. In this case the DRNG will continue to provide data + * even though it was not reseeded with full entropy. To avoid in the extreme + * case that no reseed is performed for too long, this threshold is enforced. + * If that absolute low value is reached, the LRNG is marked as not operational. + * + * This value is allowed to be changed. + */ +#define LRNG_DRNG_MAX_WITHOUT_RESEED (1<<30) + +/* + * Min required seed entropy is 128 bits covering the minimum entropy + * requirement of SP800-131A and the German BSI's TR02102. + * + * This value is allowed to be changed. + */ +#define LRNG_FULL_SEED_ENTROPY_BITS LRNG_DRNG_SECURITY_STRENGTH_BITS +#define LRNG_MIN_SEED_ENTROPY_BITS 128 +#define LRNG_INIT_ENTROPY_BITS 32 + +/* + * Wakeup value + * + * This value is allowed to be changed but must not be larger than the + * digest size of the hash operation used update the aux_pool. + */ +#ifdef CONFIG_CRYPTO_LIB_SHA256 +# define LRNG_ATOMIC_DIGEST_SIZE SHA256_DIGEST_SIZE +#else +# define LRNG_ATOMIC_DIGEST_SIZE SHA1_DIGEST_SIZE +#endif +#define LRNG_WRITE_WAKEUP_ENTROPY LRNG_ATOMIC_DIGEST_SIZE + +/* + * If the switching support is configured, we must provide support up to + * the largest digest size. Without switching support, we know it is only + * the built-in digest size. + */ +#ifdef CONFIG_LRNG_DRNG_SWITCH +# define LRNG_MAX_DIGESTSIZE 64 +#else +# define LRNG_MAX_DIGESTSIZE LRNG_ATOMIC_DIGEST_SIZE +#endif + +/* + * Oversampling factor of IRQ events to obtain + * LRNG_DRNG_SECURITY_STRENGTH_BYTES. This factor is used when a + * high-resolution time stamp is not available. In this case, jiffies and + * register contents are used to fill the entropy pool. These noise sources + * are much less entropic than the high-resolution timer. The entropy content + * is the entropy content assumed with LRNG_IRQ_ENTROPY_BITS divided by + * LRNG_IRQ_OVERSAMPLING_FACTOR. + * + * This value is allowed to be changed. + */ +#define LRNG_IRQ_OVERSAMPLING_FACTOR 10 + +/* Alignmask that is intended to be identical to CRYPTO_MINALIGN */ +#define LRNG_KCAPI_ALIGN ARCH_KMALLOC_MINALIGN + +/* + * This definition must provide a buffer that is equal to SHASH_DESC_ON_STACK + * as it will be casted into a struct shash_desc. + */ +#define LRNG_POOL_SIZE (sizeof(struct shash_desc) + HASH_MAX_DESCSIZE) + +/************************ Default DRNG implementation *************************/ + +extern struct chacha20_state chacha20; +extern const struct lrng_crypto_cb lrng_cc20_crypto_cb; +void lrng_cc20_init_state(struct chacha20_state *state); + +/********************************** /proc *************************************/ + +#ifdef CONFIG_SYSCTL +void lrng_pool_inc_numa_node(void); +void lrng_proc_update_max_write_thresh(u32 new_digestsize); +#else +static inline void lrng_pool_inc_numa_node(void) { } +static inline void lrng_proc_update_max_write_thresh(u32 new_digestsize) { } +#endif + +/****************************** LRNG interfaces *******************************/ + +extern u32 lrng_write_wakeup_bits; +extern int lrng_drng_reseed_max_time; + +void lrng_writer_wakeup(void); +void lrng_init_wakeup(void); +void lrng_debug_report_seedlevel(const char *name); +void lrng_process_ready_list(void); + +/* External interface to use of the switchable DRBG inside the kernel */ +void get_random_bytes_full(void *buf, int nbytes); + +/************************* Jitter RNG Entropy Source **************************/ + +#ifdef CONFIG_LRNG_JENT +u32 lrng_get_jent(u8 *outbuf, u32 requested_bits); +u32 lrng_jent_entropylevel(u32 requested_bits); +void lrng_jent_es_state(unsigned char *buf, size_t buflen); +#else /* CONFIG_LRNG_JENT */ +static inline u32 lrng_get_jent(u8 *outbuf, u32 requested_bits) { return 0; } +static inline u32 lrng_jent_entropylevel(u32 requested_bits) { return 0; } +static inline void lrng_jent_es_state(unsigned char *buf, size_t buflen) { } +#endif /* CONFIG_LRNG_JENT */ + +/************************** CPU-based Entropy Source **************************/ + +static inline u32 lrng_fast_noise_entropylevel(u32 ent_bits, u32 requested_bits) +{ + /* Obtain entropy statement */ + ent_bits = ent_bits * requested_bits / LRNG_DRNG_SECURITY_STRENGTH_BITS; + /* Cap entropy to buffer size in bits */ + ent_bits = min_t(u32, ent_bits, requested_bits); + return ent_bits; +} + +#ifdef CONFIG_LRNG_CPU +u32 lrng_get_arch(u8 *outbuf, u32 requested_bits); +u32 lrng_archrandom_entropylevel(u32 requested_bits); +void lrng_arch_es_state(unsigned char *buf, size_t buflen); +#else /* CONFIG_LRNG_CPU */ +static inline u32 lrng_get_arch(u8 *outbuf, u32 requested_bits) { return 0; } +static inline u32 lrng_archrandom_entropylevel(u32 requested_bits) { return 0; } +static inline void lrng_arch_es_state(unsigned char *buf, size_t buflen) { } +#endif /* CONFIG_LRNG_CPU */ + +/************************** Interrupt Entropy Source **************************/ + +#ifdef CONFIG_LRNG_IRQ +void lrng_pcpu_reset(void); +u32 lrng_pcpu_avail_pool_size(void); +u32 lrng_pcpu_avail_entropy(void); +int lrng_pcpu_switch_hash(int node, + const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb); +u32 lrng_pcpu_pool_hash(u8 *outbuf, u32 requested_bits, bool fully_seeded); +void lrng_pcpu_array_add_u32(u32 data); +u32 lrng_gcd_analyze(u32 *history, size_t nelem); +void lrng_irq_es_state(unsigned char *buf, size_t buflen); +#else /* CONFIG_LRNG_IRQ */ +static inline void lrng_pcpu_reset(void) { } +static inline u32 lrng_pcpu_avail_pool_size(void) { return 0; } +static inline u32 lrng_pcpu_avail_entropy(void) { return 0; } +static inline int lrng_pcpu_switch_hash(int node, + const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb) +{ + return 0; +} +static inline u32 lrng_pcpu_pool_hash(u8 *outbuf, u32 requested_bits, + bool fully_seeded) +{ + return 0; +} +static inline void lrng_pcpu_array_add_u32(u32 data) { } +static inline void lrng_irq_es_state(unsigned char *buf, size_t buflen) { } +#endif /* CONFIG_LRNG_IRQ */ + +/****************************** DRNG processing *******************************/ + +/* DRNG state handle */ +struct lrng_drng { + void *drng; /* DRNG handle */ + void *hash; /* Hash handle */ + const struct lrng_crypto_cb *crypto_cb; /* Crypto callbacks */ + atomic_t requests; /* Number of DRNG requests */ + atomic_t requests_since_fully_seeded; /* Number DRNG requests since + * last fully seeded + */ + unsigned long last_seeded; /* Last time it was seeded */ + bool fully_seeded; /* Is DRNG fully seeded? */ + bool force_reseed; /* Force a reseed */ + + /* Lock write operations on DRNG state, DRNG replacement of crypto_cb */ + struct mutex lock; + spinlock_t spin_lock; + /* Lock *hash replacement - always take before DRNG lock */ + rwlock_t hash_lock; +}; + +extern struct mutex lrng_crypto_cb_update; + +struct lrng_drng *lrng_drng_init_instance(void); +struct lrng_drng *lrng_drng_atomic_instance(void); + +static __always_inline bool lrng_drng_is_atomic(struct lrng_drng *drng) +{ + return (drng->drng == lrng_drng_atomic_instance()->drng); +} + +/* Lock the DRNG */ +static __always_inline void lrng_drng_lock(struct lrng_drng *drng, + unsigned long *flags) + __acquires(&drng->spin_lock) +{ + /* Use spin lock in case the atomic DRNG context is used */ + if (lrng_drng_is_atomic(drng)) { + spin_lock_irqsave(&drng->spin_lock, *flags); + + /* + * In case a lock transition happened while we were spinning, + * catch this case and use the new lock type. + */ + if (!lrng_drng_is_atomic(drng)) { + spin_unlock_irqrestore(&drng->spin_lock, *flags); + __acquire(&drng->spin_lock); + mutex_lock(&drng->lock); + } + } else { + __acquire(&drng->spin_lock); + mutex_lock(&drng->lock); + } +} + +/* Unlock the DRNG */ +static __always_inline void lrng_drng_unlock(struct lrng_drng *drng, + unsigned long *flags) + __releases(&drng->spin_lock) +{ + if (lrng_drng_is_atomic(drng)) { + spin_unlock_irqrestore(&drng->spin_lock, *flags); + } else { + mutex_unlock(&drng->lock); + __release(&drng->spin_lock); + } +} + +void lrng_reset(void); +void lrng_drngs_init_cc20(bool force_seed); +bool lrng_sp80090c_compliant(void); + +static inline u32 lrng_compress_osr(void) +{ + return lrng_sp80090c_compliant() ? CONFIG_LRNG_OVERSAMPLE_ES_BITS : 0; +} + +static inline u32 lrng_reduce_by_osr(u32 entropy_bits) +{ + u32 osr_bits = lrng_compress_osr(); + + return (entropy_bits >= osr_bits) ? (entropy_bits - osr_bits) : 0; +} + +bool lrng_get_available(void); +void lrng_set_available(void); +void lrng_drng_reset(struct lrng_drng *drng); +int lrng_drng_get_atomic(u8 *outbuf, u32 outbuflen); +int lrng_drng_get_sleep(u8 *outbuf, u32 outbuflen); +void lrng_drng_force_reseed(void); +void lrng_drng_seed_work(struct work_struct *dummy); + +#ifdef CONFIG_NUMA +struct lrng_drng **lrng_drng_instances(void); +void lrng_drngs_numa_alloc(void); +#else /* CONFIG_NUMA */ +static inline struct lrng_drng **lrng_drng_instances(void) { return NULL; } +static inline void lrng_drngs_numa_alloc(void) { return; } +#endif /* CONFIG_NUMA */ + +/************************* Entropy sources management *************************/ + +enum lrng_external_noise_source { + lrng_noise_source_hw, + lrng_noise_source_user +}; + +void lrng_set_entropy_thresh(u32 new); +u32 lrng_avail_entropy(void); +void lrng_reset_state(void); + +bool lrng_state_exseed_allow(enum lrng_external_noise_source source); +void lrng_state_exseed_set(enum lrng_external_noise_source source, bool type); +bool lrng_state_min_seeded(void); +bool lrng_state_fully_seeded(void); +bool lrng_state_operational(void); + +int lrng_pool_trylock(void); +void lrng_pool_unlock(void); +void lrng_pool_all_numa_nodes_seeded(bool set); +void lrng_pool_add_entropy(void); + +struct entropy_buf { + u8 a[LRNG_DRNG_INIT_SEED_SIZE_BYTES]; + u8 b[LRNG_DRNG_INIT_SEED_SIZE_BYTES]; + u8 c[LRNG_DRNG_INIT_SEED_SIZE_BYTES]; + u8 d[LRNG_DRNG_INIT_SEED_SIZE_BYTES]; + u32 now, a_bits, b_bits, c_bits, d_bits; +}; + +bool lrng_fully_seeded(bool fully_seeded, struct entropy_buf *eb); +void lrng_unset_fully_seeded(struct lrng_drng *drng); +void lrng_fill_seed_buffer(struct entropy_buf *entropy_buf, u32 requested_bits); +void lrng_init_ops(struct entropy_buf *eb); + +/*********************** Auxiliary Pool Entropy Source ************************/ + +u32 lrng_avail_aux_entropy(void); +void lrng_aux_es_state(unsigned char *buf, size_t buflen); +u32 lrng_get_digestsize(void); +void lrng_pool_set_entropy(u32 entropy_bits); +int lrng_aux_switch_hash(const struct lrng_crypto_cb *new_cb, void *new_hash, + const struct lrng_crypto_cb *old_cb); +int lrng_pool_insert_aux(const u8 *inbuf, u32 inbuflen, u32 entropy_bits); +void lrng_get_backtrack_aux(struct entropy_buf *entropy_buf, + u32 requested_bits); + +/* Obtain the security strength of the LRNG in bits */ +static inline u32 lrng_security_strength(void) +{ + /* + * We use a hash to read the entropy in the entropy pool. According to + * SP800-90B table 1, the entropy can be at most the digest size. + * Considering this together with the last sentence in section 3.1.5.1.2 + * the security strength of a (approved) hash is equal to its output + * size. On the other hand the entropy cannot be larger than the + * security strength of the used DRBG. + */ + return min_t(u32, LRNG_FULL_SEED_ENTROPY_BITS, lrng_get_digestsize()); +} + +static inline u32 lrng_get_seed_entropy_osr(bool fully_seeded) +{ + u32 requested_bits = lrng_security_strength(); + + /* Apply oversampling during initialization according to SP800-90C */ + if (lrng_sp80090c_compliant() && !fully_seeded) + requested_bits += CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS; + return requested_bits; +} + +/************************** Health Test linking code **************************/ + +enum lrng_health_res { + lrng_health_pass, /* Health test passes on time stamp */ + lrng_health_fail_use, /* Time stamp unhealthy, but mix in */ + lrng_health_fail_drop /* Time stamp unhealthy, drop it */ +}; + +#ifdef CONFIG_LRNG_HEALTH_TESTS +bool lrng_sp80090b_startup_complete(void); +bool lrng_sp80090b_compliant(void); + +enum lrng_health_res lrng_health_test(u32 now_time); +void lrng_health_disable(void); + +#else /* CONFIG_LRNG_HEALTH_TESTS */ +static inline bool lrng_sp80090b_startup_complete(void) { return true; } +static inline bool lrng_sp80090b_compliant(void) { return false; } + +static inline enum lrng_health_res +lrng_health_test(u32 now_time) { return lrng_health_pass; } +static inline void lrng_health_disable(void) { } +#endif /* CONFIG_LRNG_HEALTH_TESTS */ + +/****************************** Helper code ***********************************/ + +static inline u32 atomic_read_u32(atomic_t *v) +{ + return (u32)atomic_read(v); +} + +/******************** Crypto Primitive Switching Support **********************/ + +#ifdef CONFIG_LRNG_DRNG_SWITCH +static inline void lrng_hash_lock(struct lrng_drng *drng, unsigned long *flags) +{ + read_lock_irqsave(&drng->hash_lock, *flags); +} + +static inline void lrng_hash_unlock(struct lrng_drng *drng, unsigned long flags) +{ + read_unlock_irqrestore(&drng->hash_lock, flags); +} +#else /* CONFIG_LRNG_DRNG_SWITCH */ +static inline void lrng_hash_lock(struct lrng_drng *drng, unsigned long *flags) +{ } + +static inline void lrng_hash_unlock(struct lrng_drng *drng, unsigned long flags) +{ } +#endif /* CONFIG_LRNG_DRNG_SWITCH */ + +/*************************** Auxiliary functions ******************************/ + +void invalidate_batched_entropy(void); + +/***************************** Testing code ***********************************/ + +#ifdef CONFIG_LRNG_RAW_HIRES_ENTROPY +bool lrng_raw_hires_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_HIRES_ENTROPY */ +static inline bool lrng_raw_hires_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_HIRES_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_JIFFIES_ENTROPY +bool lrng_raw_jiffies_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_JIFFIES_ENTROPY */ +static inline bool lrng_raw_jiffies_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_JIFFIES_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_IRQ_ENTROPY +bool lrng_raw_irq_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_IRQ_ENTROPY */ +static inline bool lrng_raw_irq_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_IRQ_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY +bool lrng_raw_irqflags_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY */ +static inline bool lrng_raw_irqflags_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_RETIP_ENTROPY +bool lrng_raw_retip_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_RETIP_ENTROPY */ +static inline bool lrng_raw_retip_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_RETIP_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_REGS_ENTROPY +bool lrng_raw_regs_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_REGS_ENTROPY */ +static inline bool lrng_raw_regs_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_REGS_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_ARRAY +bool lrng_raw_array_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_ARRAY */ +static inline bool lrng_raw_array_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_ARRAY */ + +#ifdef CONFIG_LRNG_IRQ_PERF +bool lrng_perf_time(u32 start); +#else /* CONFIG_LRNG_IRQ_PERF */ +static inline bool lrng_perf_time(u32 start) { return false; } +#endif /*CONFIG_LRNG_IRQ_PERF */ + +#endif /* _LRNG_INTERNAL_H */ diff --git a/drivers/char/lrng/lrng_kcapi.c b/drivers/char/lrng/lrng_kcapi.c new file mode 100644 index 0000000000000..fa76b2d57b7f1 --- /dev/null +++ b/drivers/char/lrng/lrng_kcapi.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the cryptographic primitives using the + * kernel crypto API. + * + * Copyright (C) 2018 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include "lrng_kcapi_hash.h" + +static char *drng_name = NULL; +module_param(drng_name, charp, 0444); +MODULE_PARM_DESC(drng_name, "Kernel crypto API name of DRNG"); + +static char *pool_hash = "sha512"; +module_param(pool_hash, charp, 0444); +MODULE_PARM_DESC(pool_hash, + "Kernel crypto API name of hash or keyed message digest to read the entropy pool"); + +static char *seed_hash = NULL; +module_param(seed_hash, charp, 0444); +MODULE_PARM_DESC(seed_hash, + "Kernel crypto API name of hash with output size equal to seedsize of DRNG to bring seed string to the size required by the DRNG"); + +struct lrng_drng_info { + struct crypto_rng *kcapi_rng; + void *lrng_hash; +}; + +static void *lrng_kcapi_drng_hash_alloc(void) +{ + return lrng_kcapi_hash_alloc(pool_hash); +} + +static int lrng_kcapi_drng_seed_helper(void *drng, const u8 *inbuf, + u32 inbuflen) +{ + SHASH_DESC_ON_STACK(shash, NULL); + struct lrng_drng_info *lrng_drng_info = (struct lrng_drng_info *)drng; + struct crypto_rng *kcapi_rng = lrng_drng_info->kcapi_rng; + void *hash = lrng_drng_info->lrng_hash; + u32 digestsize = lrng_kcapi_hash_digestsize(hash); + u8 digest[HASH_MAX_DIGESTSIZE] __aligned(8); + int ret; + + if (!hash) + return crypto_rng_reset(kcapi_rng, inbuf, inbuflen); + + ret = lrng_kcapi_hash_init(shash, hash) ?: + lrng_kcapi_hash_update(shash, inbuf, inbuflen) ?: + lrng_kcapi_hash_final(shash, digest); + lrng_kcapi_hash_zero(shash); + if (ret) + return ret; + + ret = crypto_rng_reset(kcapi_rng, digest, digestsize); + if (ret) + return ret; + + memzero_explicit(digest, digestsize); + return 0; +} + +static int lrng_kcapi_drng_generate_helper(void *drng, u8 *outbuf, + u32 outbuflen) +{ + struct lrng_drng_info *lrng_drng_info = (struct lrng_drng_info *)drng; + struct crypto_rng *kcapi_rng = lrng_drng_info->kcapi_rng; + int ret = crypto_rng_get_bytes(kcapi_rng, outbuf, outbuflen); + + if (ret < 0) + return ret; + + return outbuflen; +} + +static void *lrng_kcapi_drng_alloc(u32 sec_strength) +{ + struct lrng_drng_info *lrng_drng_info; + struct crypto_rng *kcapi_rng; + int seedsize; + void *ret = ERR_PTR(-ENOMEM); + + if (!drng_name) { + pr_err("DRNG name missing\n"); + return ERR_PTR(-EINVAL); + } + + if (!memcmp(drng_name, "drbg", 4) || + !memcmp(drng_name, "stdrng", 6) || + !memcmp(drng_name, "jitterentropy_rng", 17)) { + pr_err("Refusing to load the requested random number generator\n"); + return ERR_PTR(-EINVAL); + } + + lrng_drng_info = kmalloc(sizeof(*lrng_drng_info), GFP_KERNEL); + if (!lrng_drng_info) + return ERR_PTR(-ENOMEM); + + kcapi_rng = crypto_alloc_rng(drng_name, 0, 0); + if (IS_ERR(kcapi_rng)) { + pr_err("DRNG %s cannot be allocated\n", drng_name); + ret = ERR_CAST(kcapi_rng); + goto free; + } + lrng_drng_info->kcapi_rng = kcapi_rng; + + seedsize = crypto_rng_seedsize(kcapi_rng); + + if (sec_strength > seedsize) + pr_info("Seedsize DRNG (%u bits) lower than security strength of LRNG noise source (%u bits)\n", + crypto_rng_seedsize(kcapi_rng) * 8, sec_strength * 8); + + if (seedsize) { + void *lrng_hash; + + if (!seed_hash) { + switch (seedsize) { + case 32: + seed_hash = "sha256"; + break; + case 48: + seed_hash = "sha384"; + break; + case 64: + seed_hash = "sha512"; + break; + default: + pr_err("Seed size %d cannot be processed\n", + seedsize); + goto dealloc; + } + } + + lrng_hash = lrng_kcapi_hash_alloc(seed_hash); + if (IS_ERR(lrng_hash)) { + ret = ERR_CAST(lrng_hash); + goto dealloc; + } + + if (seedsize != lrng_kcapi_hash_digestsize(lrng_hash)) { + pr_err("Seed hash output size not equal to DRNG seed size\n"); + lrng_kcapi_hash_dealloc(lrng_hash); + ret = ERR_PTR(-EINVAL); + goto dealloc; + } + + lrng_drng_info->lrng_hash = lrng_hash; + + pr_info("Seed hash %s allocated\n", seed_hash); + } else { + lrng_drng_info->lrng_hash = NULL; + } + + pr_info("Kernel crypto API DRNG %s allocated\n", drng_name); + + return lrng_drng_info; + +dealloc: + crypto_free_rng(kcapi_rng); +free: + kfree(lrng_drng_info); + return ret; +} + +static void lrng_kcapi_drng_dealloc(void *drng) +{ + struct lrng_drng_info *lrng_drng_info = (struct lrng_drng_info *)drng; + struct crypto_rng *kcapi_rng = lrng_drng_info->kcapi_rng; + + crypto_free_rng(kcapi_rng); + if (lrng_drng_info->lrng_hash) + lrng_kcapi_hash_dealloc(lrng_drng_info->lrng_hash); + kfree(lrng_drng_info); + pr_info("DRNG %s deallocated\n", drng_name); +} + +static const char *lrng_kcapi_drng_name(void) +{ + return drng_name; +} + +static const char *lrng_kcapi_pool_hash(void) +{ + return pool_hash; +} + +static const struct lrng_crypto_cb lrng_kcapi_crypto_cb = { + .lrng_drng_name = lrng_kcapi_drng_name, + .lrng_hash_name = lrng_kcapi_pool_hash, + .lrng_drng_alloc = lrng_kcapi_drng_alloc, + .lrng_drng_dealloc = lrng_kcapi_drng_dealloc, + .lrng_drng_seed_helper = lrng_kcapi_drng_seed_helper, + .lrng_drng_generate_helper = lrng_kcapi_drng_generate_helper, + .lrng_hash_alloc = lrng_kcapi_drng_hash_alloc, + .lrng_hash_dealloc = lrng_kcapi_hash_dealloc, + .lrng_hash_digestsize = lrng_kcapi_hash_digestsize, + .lrng_hash_init = lrng_kcapi_hash_init, + .lrng_hash_update = lrng_kcapi_hash_update, + .lrng_hash_final = lrng_kcapi_hash_final, + .lrng_hash_desc_zero = lrng_kcapi_hash_zero, +}; + +static int __init lrng_kcapi_init(void) +{ + return lrng_set_drng_cb(&lrng_kcapi_crypto_cb); +} +static void __exit lrng_kcapi_exit(void) +{ + lrng_set_drng_cb(NULL); +} + +late_initcall(lrng_kcapi_init); +module_exit(lrng_kcapi_exit); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Linux Random Number Generator - kernel crypto API DRNG backend"); diff --git a/drivers/char/lrng/lrng_kcapi_hash.c b/drivers/char/lrng/lrng_kcapi_hash.c new file mode 100644 index 0000000000000..9927e1022de59 --- /dev/null +++ b/drivers/char/lrng/lrng_kcapi_hash.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for providing the hash primitive using the kernel crypto API. + * + * Copyright (C) 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "lrng_kcapi_hash.h" + +struct lrng_hash_info { + struct crypto_shash *tfm; +}; + +static void _lrng_kcapi_hash_free(struct lrng_hash_info *lrng_hash) +{ + struct crypto_shash *tfm = lrng_hash->tfm; + + crypto_free_shash(tfm); + kfree(lrng_hash); +} + +void *lrng_kcapi_hash_alloc(const char *name) +{ + struct lrng_hash_info *lrng_hash; + struct crypto_shash *tfm; + int ret; + + if (!name) { + pr_err("Hash name missing\n"); + return ERR_PTR(-EINVAL); + } + + tfm = crypto_alloc_shash(name, 0, 0); + if (IS_ERR(tfm)) { + pr_err("could not allocate hash %s\n", name); + return ERR_CAST(tfm); + } + + ret = sizeof(struct lrng_hash_info); + lrng_hash = kmalloc(ret, GFP_KERNEL); + if (!lrng_hash) { + crypto_free_shash(tfm); + return ERR_PTR(-ENOMEM); + } + + lrng_hash->tfm = tfm; + + pr_info("Hash %s allocated\n", name); + + return lrng_hash; +} +EXPORT_SYMBOL(lrng_kcapi_hash_alloc); + +u32 lrng_kcapi_hash_digestsize(void *hash) +{ + struct lrng_hash_info *lrng_hash = (struct lrng_hash_info *)hash; + struct crypto_shash *tfm = lrng_hash->tfm; + + return crypto_shash_digestsize(tfm); +} +EXPORT_SYMBOL(lrng_kcapi_hash_digestsize); + +void lrng_kcapi_hash_dealloc(void *hash) +{ + struct lrng_hash_info *lrng_hash = (struct lrng_hash_info *)hash; + + _lrng_kcapi_hash_free(lrng_hash); + pr_info("Hash deallocated\n"); +} +EXPORT_SYMBOL(lrng_kcapi_hash_dealloc); + +int lrng_kcapi_hash_init(struct shash_desc *shash, void *hash) +{ + struct lrng_hash_info *lrng_hash = (struct lrng_hash_info *)hash; + struct crypto_shash *tfm = lrng_hash->tfm; + + shash->tfm = tfm; + return crypto_shash_init(shash); +} +EXPORT_SYMBOL(lrng_kcapi_hash_init); + +int lrng_kcapi_hash_update(struct shash_desc *shash, const u8 *inbuf, + u32 inbuflen) +{ + return crypto_shash_update(shash, inbuf, inbuflen); +} +EXPORT_SYMBOL(lrng_kcapi_hash_update); + +int lrng_kcapi_hash_final(struct shash_desc *shash, u8 *digest) +{ + return crypto_shash_final(shash, digest); +} +EXPORT_SYMBOL(lrng_kcapi_hash_final); + +void lrng_kcapi_hash_zero(struct shash_desc *shash) +{ + shash_desc_zero(shash); +} +EXPORT_SYMBOL(lrng_kcapi_hash_zero); diff --git a/drivers/char/lrng/lrng_kcapi_hash.h b/drivers/char/lrng/lrng_kcapi_hash.h new file mode 100644 index 0000000000000..2f94558d2dd64 --- /dev/null +++ b/drivers/char/lrng/lrng_kcapi_hash.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2020 - 2021, Stephan Mueller + */ + +#ifndef _LRNG_KCAPI_HASH_H +#define _LRNG_KCAPI_HASH_H + +#include + +void *lrng_kcapi_hash_alloc(const char *name); +u32 lrng_kcapi_hash_digestsize(void *hash); +void lrng_kcapi_hash_dealloc(void *hash); +int lrng_kcapi_hash_init(struct shash_desc *shash, void *hash); +int lrng_kcapi_hash_update(struct shash_desc *shash, const u8 *inbuf, + u32 inbuflen); +int lrng_kcapi_hash_final(struct shash_desc *shash, u8 *digest); +void lrng_kcapi_hash_zero(struct shash_desc *shash); + +#endif /* _LRNG_KCAPI_HASH_H */ diff --git a/drivers/char/lrng/lrng_numa.c b/drivers/char/lrng/lrng_numa.c new file mode 100644 index 0000000000000..094929107c462 --- /dev/null +++ b/drivers/char/lrng/lrng_numa.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG NUMA support + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_internal.h" + +static struct lrng_drng **lrng_drng __read_mostly = NULL; + +struct lrng_drng **lrng_drng_instances(void) +{ + /* counterpart to cmpxchg_release in _lrng_drngs_numa_alloc */ + return READ_ONCE(lrng_drng); +} + +/* Allocate the data structures for the per-NUMA node DRNGs */ +static void _lrng_drngs_numa_alloc(struct work_struct *work) +{ + struct lrng_drng **drngs; + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + u32 node; + bool init_drng_used = false; + + mutex_lock(&lrng_crypto_cb_update); + + /* per-NUMA-node DRNGs are already present */ + if (lrng_drng) + goto unlock; + + drngs = kcalloc(nr_node_ids, sizeof(void *), GFP_KERNEL|__GFP_NOFAIL); + for_each_online_node(node) { + struct lrng_drng *drng; + + if (!init_drng_used) { + drngs[node] = lrng_drng_init; + init_drng_used = true; + continue; + } + + drng = kmalloc_node(sizeof(struct lrng_drng), + GFP_KERNEL|__GFP_NOFAIL, node); + memset(drng, 0, sizeof(lrng_drng)); + + drng->crypto_cb = lrng_drng_init->crypto_cb; + drng->drng = drng->crypto_cb->lrng_drng_alloc( + LRNG_DRNG_SECURITY_STRENGTH_BYTES); + if (IS_ERR(drng->drng)) { + kfree(drng); + goto err; + } + + drng->hash = drng->crypto_cb->lrng_hash_alloc(); + if (IS_ERR(drng->hash)) { + drng->crypto_cb->lrng_drng_dealloc(drng->drng); + kfree(drng); + goto err; + } + + mutex_init(&drng->lock); + spin_lock_init(&drng->spin_lock); + rwlock_init(&drng->hash_lock); + + /* + * Switch the hash used by the per-CPU pool. + * We do not need to lock the new hash as it is not usable yet + * due to **drngs not yet being initialized. + */ + if (lrng_pcpu_switch_hash(node, drng->crypto_cb, drng->hash, + &lrng_cc20_crypto_cb)) + goto err; + + /* + * No reseeding of NUMA DRNGs from previous DRNGs as this + * would complicate the code. Let it simply reseed. + */ + lrng_drng_reset(drng); + drngs[node] = drng; + + lrng_pool_inc_numa_node(); + pr_info("DRNG and entropy pool read hash for NUMA node %d allocated\n", + node); + } + + /* counterpart to smp_load_acquire in lrng_drng_instances */ + if (!cmpxchg_release(&lrng_drng, NULL, drngs)) { + lrng_pool_all_numa_nodes_seeded(false); + goto unlock; + } + +err: + for_each_online_node(node) { + struct lrng_drng *drng = drngs[node]; + + if (drng == lrng_drng_init) + continue; + + if (drng) { + lrng_pcpu_switch_hash(node, &lrng_cc20_crypto_cb, NULL, + drng->crypto_cb); + drng->crypto_cb->lrng_hash_dealloc(drng->hash); + drng->crypto_cb->lrng_drng_dealloc(drng->drng); + kfree(drng); + } + } + kfree(drngs); + +unlock: + mutex_unlock(&lrng_crypto_cb_update); +} + +static DECLARE_WORK(lrng_drngs_numa_alloc_work, _lrng_drngs_numa_alloc); + +void lrng_drngs_numa_alloc(void) +{ + schedule_work(&lrng_drngs_numa_alloc_work); +} diff --git a/drivers/char/lrng/lrng_proc.c b/drivers/char/lrng/lrng_proc.c new file mode 100644 index 0000000000000..5c7b4908fb0c2 --- /dev/null +++ b/drivers/char/lrng/lrng_proc.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG proc and sysctl interfaces + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#include +#include +#include +#include +#include + +#include "lrng_internal.h" + +/* + * This function is used to return both the bootid UUID, and random + * UUID. The difference is in whether table->data is NULL; if it is, + * then a new UUID is generated and returned to the user. + * + * If the user accesses this via the proc interface, the UUID will be + * returned as an ASCII string in the standard UUID format; if via the + * sysctl system call, as 16 bytes of binary data. + */ +static int lrng_proc_do_uuid(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + unsigned char buf[64], tmp_uuid[16], *uuid; + + uuid = table->data; + if (!uuid) { + uuid = tmp_uuid; + generate_random_uuid(uuid); + } else { + static DEFINE_SPINLOCK(bootid_spinlock); + + spin_lock(&bootid_spinlock); + if (!uuid[8]) + generate_random_uuid(uuid); + spin_unlock(&bootid_spinlock); + } + + sprintf(buf, "%pU", uuid); + + fake_table.data = buf; + fake_table.maxlen = sizeof(buf); + + return proc_dostring(&fake_table, write, buffer, lenp, ppos); +} + +static int lrng_proc_do_entropy(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + int entropy_count; + + entropy_count = lrng_avail_entropy(); + + fake_table.data = &entropy_count; + fake_table.maxlen = sizeof(entropy_count); + + return proc_dointvec(&fake_table, write, buffer, lenp, ppos); +} + +static int lrng_proc_do_poolsize(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + int entropy_count; + + /* LRNG can at most retain entropy in per-CPU pools and aux pool */ + entropy_count = lrng_get_digestsize() + lrng_pcpu_avail_pool_size(); + + fake_table.data = &entropy_count; + fake_table.maxlen = sizeof(entropy_count); + + return proc_dointvec(&fake_table, write, buffer, lenp, ppos); +} + +static int lrng_min_write_thresh; +static int lrng_max_write_thresh = (LRNG_WRITE_WAKEUP_ENTROPY << 3); +static char lrng_sysctl_bootid[16]; +static int lrng_drng_reseed_max_min; + +void lrng_proc_update_max_write_thresh(u32 new_digestsize) +{ + lrng_max_write_thresh = (int)new_digestsize; + /* Ensure that changes to the global variable are visible */ + mb(); +} + +static struct ctl_table random_table[] = { + { + .procname = "poolsize", + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = lrng_proc_do_poolsize, + }, + { + .procname = "entropy_avail", + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = lrng_proc_do_entropy, + }, + { + .procname = "write_wakeup_threshold", + .data = &lrng_write_wakeup_bits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &lrng_min_write_thresh, + .extra2 = &lrng_max_write_thresh, + }, + { + .procname = "boot_id", + .data = &lrng_sysctl_bootid, + .maxlen = 16, + .mode = 0444, + .proc_handler = lrng_proc_do_uuid, + }, + { + .procname = "uuid", + .maxlen = 16, + .mode = 0444, + .proc_handler = lrng_proc_do_uuid, + }, + { + .procname = "urandom_min_reseed_secs", + .data = &lrng_drng_reseed_max_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &lrng_drng_reseed_max_min, + }, + { } +}; + +/* Number of online DRNGs */ +static u32 numa_drngs = 1; + +void lrng_pool_inc_numa_node(void) +{ + numa_drngs++; +} + +static int lrng_proc_type_show(struct seq_file *m, void *v) +{ + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + unsigned long flags = 0; + unsigned char buf[250], irq[200], aux[100], cpu[90], jent[45]; + + lrng_drng_lock(lrng_drng_init, &flags); + snprintf(buf, sizeof(buf), + "DRNG name: %s\n" + "LRNG security strength in bits: %d\n" + "number of DRNG instances: %u\n" + "Standards compliance: %s\n" + "Entropy Sources: %s%s%sAuxiliary\n" + "LRNG minimally seeded: %s\n" + "LRNG fully seeded: %s\n", + lrng_drng_init->crypto_cb->lrng_drng_name(), + lrng_security_strength(), + numa_drngs, + lrng_sp80090c_compliant() ? "SP800-90C " : "", + IS_ENABLED(CONFIG_LRNG_IRQ) ? "IRQ " : "", + IS_ENABLED(CONFIG_LRNG_JENT) ? "JitterRNG " : "", + IS_ENABLED(CONFIG_LRNG_CPU) ? "CPU " : "", + lrng_state_min_seeded() ? "true" : "false", + lrng_state_fully_seeded() ? "true" : "false"); + + lrng_aux_es_state(aux, sizeof(aux)); + + irq[0] = '\0'; + lrng_irq_es_state(irq, sizeof(irq)); + + jent[0] = '\0'; + lrng_jent_es_state(jent, sizeof(jent)); + + cpu[0] = '\0'; + lrng_arch_es_state(cpu, sizeof(cpu)); + + lrng_drng_unlock(lrng_drng_init, &flags); + + seq_write(m, buf, strlen(buf)); + seq_write(m, aux, strlen(aux)); + seq_write(m, irq, strlen(irq)); + seq_write(m, jent, strlen(jent)); + seq_write(m, cpu, strlen(cpu)); + + return 0; +} + +/* + * rand_initialize() is called before sysctl_init(), + * so we cannot call register_sysctl_init() in rand_initialize() + */ +static int __init random_sysctls_init(void) +{ + register_sysctl_init("kernel/random", random_table); + return 0; +} +device_initcall(random_sysctls_init); + +static int __init lrng_proc_type_init(void) +{ + proc_create_single("lrng_type", 0444, NULL, &lrng_proc_type_show); + return 0; +} +module_init(lrng_proc_type_init); diff --git a/drivers/char/lrng/lrng_selftest.c b/drivers/char/lrng/lrng_selftest.c new file mode 100644 index 0000000000000..2e81047fe6a78 --- /dev/null +++ b/drivers/char/lrng/lrng_selftest.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG power-on and on-demand self-test + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +/* + * In addition to the self-tests below, the following LRNG components + * are covered with self-tests during regular operation: + * + * * power-on self-test: SP800-90A DRBG provided by the Linux kernel crypto API + * * power-on self-test: PRNG provided by the Linux kernel crypto API + * * runtime test: Raw noise source data testing including SP800-90B compliant + * tests when enabling CONFIG_LRNG_HEALTH_TESTS + * + * Additional developer tests present with LRNG code: + * * SP800-90B APT and RCT test enforcement validation when enabling + * CONFIG_LRNG_APT_BROKEN or CONFIG_LRNG_RCT_BROKEN. + * * Collection of raw entropy from the interrupt noise source when enabling + * CONFIG_LRNG_TESTING and pulling the data from the kernel with the provided + * interface. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "lrng_chacha20.h" +#include "lrng_internal.h" + +#define LRNG_SELFTEST_PASSED 0 +#define LRNG_SEFLTEST_ERROR_TIME (1 << 0) +#define LRNG_SEFLTEST_ERROR_CHACHA20 (1 << 1) +#define LRNG_SEFLTEST_ERROR_HASH (1 << 2) +#define LRNG_SEFLTEST_ERROR_GCD (1 << 3) +#define LRNG_SELFTEST_NOT_EXECUTED 0xffffffff + +#ifdef CONFIG_LRNG_IRQ + +#include "lrng_es_irq.h" + +static u32 lrng_data_selftest_ptr = 0; +static u32 lrng_data_selftest[LRNG_DATA_ARRAY_SIZE]; + +static void lrng_data_process_selftest_insert(u32 time) +{ + u32 ptr = lrng_data_selftest_ptr++ & LRNG_DATA_WORD_MASK; + unsigned int array = lrng_data_idx2array(ptr); + unsigned int slot = lrng_data_idx2slot(ptr); + + /* zeroization of slot to ensure the following OR adds the data */ + lrng_data_selftest[array] &= + ~(lrng_data_slot_val(0xffffffff & LRNG_DATA_SLOTSIZE_MASK, + slot)); + lrng_data_selftest[array] |= + lrng_data_slot_val(time & LRNG_DATA_SLOTSIZE_MASK, slot); +} + +static void lrng_data_process_selftest_u32(u32 data) +{ + u32 pre_ptr, ptr, mask; + unsigned int pre_array; + + /* Increment pointer by number of slots taken for input value */ + lrng_data_selftest_ptr += LRNG_DATA_SLOTS_PER_UINT; + + /* ptr to current unit */ + ptr = lrng_data_selftest_ptr; + + lrng_pcpu_split_u32(&ptr, &pre_ptr, &mask); + + /* MSB of data go into previous unit */ + pre_array = lrng_data_idx2array(pre_ptr); + /* zeroization of slot to ensure the following OR adds the data */ + lrng_data_selftest[pre_array] &= ~(0xffffffff & ~mask); + lrng_data_selftest[pre_array] |= data & ~mask; + + /* LSB of data go into current unit */ + lrng_data_selftest[lrng_data_idx2array(ptr)] = data & mask; +} + +static unsigned int lrng_data_process_selftest(void) +{ + u32 time; + u32 idx_zero_compare = (0 << 0) | (1 << 8) | (2 << 16) | (3 << 24); + u32 idx_one_compare = (4 << 0) | (5 << 8) | (6 << 16) | (7 << 24); + u32 idx_last_compare = + (((LRNG_DATA_NUM_VALUES - 4) & LRNG_DATA_SLOTSIZE_MASK) << 0) | + (((LRNG_DATA_NUM_VALUES - 3) & LRNG_DATA_SLOTSIZE_MASK) << 8) | + (((LRNG_DATA_NUM_VALUES - 2) & LRNG_DATA_SLOTSIZE_MASK) << 16) | + (((LRNG_DATA_NUM_VALUES - 1) & LRNG_DATA_SLOTSIZE_MASK) << 24); + + (void)idx_one_compare; + + /* "poison" the array to verify the operation of the zeroization */ + lrng_data_selftest[0] = 0xffffffff; + lrng_data_selftest[1] = 0xffffffff; + + lrng_data_process_selftest_insert(0); + /* + * Note, when using lrng_data_process_u32() on unaligned ptr, + * the first slots will go into next word, and the last slots go + * into the previous word. + */ + lrng_data_process_selftest_u32((4 << 0) | (1 << 8) | (2 << 16) | + (3 << 24)); + lrng_data_process_selftest_insert(5); + lrng_data_process_selftest_insert(6); + lrng_data_process_selftest_insert(7); + + if ((lrng_data_selftest[0] != idx_zero_compare) || + (lrng_data_selftest[1] != idx_one_compare)) + goto err; + + /* Reset for next test */ + lrng_data_selftest[0] = 0; + lrng_data_selftest[1] = 0; + lrng_data_selftest_ptr = 0; + + for (time = 0; time < LRNG_DATA_NUM_VALUES; time++) + lrng_data_process_selftest_insert(time); + + if ((lrng_data_selftest[0] != idx_zero_compare) || + (lrng_data_selftest[1] != idx_one_compare) || + (lrng_data_selftest[LRNG_DATA_ARRAY_SIZE - 1] != idx_last_compare)) + goto err; + + return LRNG_SELFTEST_PASSED; + +err: + pr_err("LRNG data array self-test FAILED\n"); + return LRNG_SEFLTEST_ERROR_TIME; +} + +static unsigned int lrng_gcd_selftest(void) +{ + u32 history[10]; + unsigned int i; + +#define LRNG_GCD_SELFTEST 3 + for (i = 0; i < ARRAY_SIZE(history); i++) + history[i] = i * LRNG_GCD_SELFTEST; + + if (lrng_gcd_analyze(history, ARRAY_SIZE(history)) == LRNG_GCD_SELFTEST) + return LRNG_SELFTEST_PASSED; + + pr_err("LRNG GCD self-test FAILED\n"); + return LRNG_SEFLTEST_ERROR_GCD; +} + +#else /* CONFIG_LRNG_IRQ */ + +static unsigned int lrng_data_process_selftest(void) +{ + return LRNG_SELFTEST_PASSED; +} + +static unsigned int lrng_gcd_selftest(void) +{ + return LRNG_SELFTEST_PASSED; +} + +#endif /* CONFIG_LRNG_IRQ */ + +static void lrng_selftest_bswap32(u32 *ptr, u32 words) +{ + u32 i; + + /* Byte-swap data which is an LE representation */ + for (i = 0; i < words; i++) { + __le32 *p = (__le32 *)ptr; + + *p = cpu_to_le32(*ptr); + ptr++; + } +} + +/* The test vectors are taken from crypto/testmgr.h */ +static unsigned int lrng_hash_selftest(void) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_crypto_cb *crypto_cb = &lrng_cc20_crypto_cb; + static const u8 lrng_hash_selftest_result[] = +#ifdef CONFIG_CRYPTO_LIB_SHA256 + { 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, + 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23, + 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, + 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad }; +#else /* CONFIG_CRYPTO_LIB_SHA256 */ + { 0xa9, 0x99, 0x3e, 0x36, 0x47, 0x06, 0x81, 0x6a, 0xba, 0x3e, + 0x25, 0x71, 0x78, 0x50, 0xc2, 0x6c, 0x9c, 0xd0, 0xd8, 0x9d }; +#endif /* CONFIG_CRYPTO_LIB_SHA256 */ + static const u8 hash_input[] = { 0x61, 0x62, 0x63 }; /* "abc" */ + u8 digest[sizeof(lrng_hash_selftest_result)] __aligned(sizeof(u32)); + + if (sizeof(digest) != crypto_cb->lrng_hash_digestsize(NULL)) + return LRNG_SEFLTEST_ERROR_HASH; + + if (!crypto_cb->lrng_hash_init(shash, NULL) && + !crypto_cb->lrng_hash_update(shash, hash_input, + sizeof(hash_input)) && + !crypto_cb->lrng_hash_final(shash, digest) && + !memcmp(digest, lrng_hash_selftest_result, sizeof(digest))) + return 0; + + pr_err("LRNG %s Hash self-test FAILED\n", crypto_cb->lrng_hash_name()); + return LRNG_SEFLTEST_ERROR_HASH; +} + +/* + * The test vectors were generated using the ChaCha20 DRNG from + * https://www.chronox.de/chacha20.html + */ +static unsigned int lrng_chacha20_drng_selftest(void) +{ + const struct lrng_crypto_cb *crypto_cb = &lrng_cc20_crypto_cb; + u8 seed[CHACHA_KEY_SIZE * 2] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + }; + struct chacha20_block chacha20; + int ret; + u8 outbuf[CHACHA_KEY_SIZE * 2] __aligned(sizeof(u32)); + + /* + * Expected result when ChaCha20 DRNG state is zero: + * * constants are set to "expand 32-byte k" + * * remaining state is 0 + * and pulling one half ChaCha20 DRNG block. + */ + static const u8 expected_halfblock[CHACHA_KEY_SIZE] = { + 0x76, 0xb8, 0xe0, 0xad, 0xa0, 0xf1, 0x3d, 0x90, + 0x40, 0x5d, 0x6a, 0xe5, 0x53, 0x86, 0xbd, 0x28, + 0xbd, 0xd2, 0x19, 0xb8, 0xa0, 0x8d, 0xed, 0x1a, + 0xa8, 0x36, 0xef, 0xcc, 0x8b, 0x77, 0x0d, 0xc7 }; + + /* + * Expected result when ChaCha20 DRNG state is zero: + * * constants are set to "expand 32-byte k" + * * remaining state is 0 + * followed by a reseed with two keyblocks + * 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + * 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + * 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + * 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + * 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + * 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + * 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + * 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f + * and pulling one ChaCha20 DRNG block. + */ + static const u8 expected_oneblock[CHACHA_KEY_SIZE * 2] = { + 0xe3, 0xb0, 0x8a, 0xcc, 0x34, 0xc3, 0x17, 0x0e, + 0xc3, 0xd8, 0xc3, 0x40, 0xe7, 0x73, 0xe9, 0x0d, + 0xd1, 0x62, 0xa3, 0x5d, 0x7d, 0xf2, 0xf1, 0x4a, + 0x24, 0x42, 0xb7, 0x1e, 0xb0, 0x05, 0x17, 0x07, + 0xb9, 0x35, 0x10, 0x69, 0x8b, 0x46, 0xfb, 0x51, + 0xe9, 0x91, 0x3f, 0x46, 0xf2, 0x4d, 0xea, 0xd0, + 0x81, 0xc1, 0x1b, 0xa9, 0x5d, 0x52, 0x91, 0x5f, + 0xcd, 0xdc, 0xc6, 0xd6, 0xc3, 0x7c, 0x50, 0x23 }; + + /* + * Expected result when ChaCha20 DRNG state is zero: + * * constants are set to "expand 32-byte k" + * * remaining state is 0 + * followed by a reseed with one key block plus one byte + * 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + * 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + * 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + * 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + * 0x20 + * and pulling less than one ChaCha20 DRNG block. + */ + static const u8 expected_block_nonalinged[CHACHA_KEY_SIZE + 4] = { + 0x9c, 0xfc, 0x5e, 0x31, 0x21, 0x62, 0x11, 0x85, + 0xd3, 0x77, 0xd3, 0x69, 0x0f, 0xa8, 0x16, 0x55, + 0xb4, 0x4c, 0xf6, 0x52, 0xf3, 0xa8, 0x37, 0x99, + 0x38, 0x76, 0xa0, 0x66, 0xec, 0xbb, 0xce, 0xa9, + 0x9c, 0x95, 0xa1, 0xfd }; + + BUILD_BUG_ON(sizeof(seed) % sizeof(u32)); + + memset(&chacha20, 0, sizeof(chacha20)); + lrng_cc20_init_rfc7539(&chacha20); + lrng_selftest_bswap32((u32 *)seed, sizeof(seed) / sizeof(u32)); + + /* Generate with zero state */ + ret = crypto_cb->lrng_drng_generate_helper(&chacha20, outbuf, + sizeof(expected_halfblock)); + if (ret != sizeof(expected_halfblock)) + goto err; + if (memcmp(outbuf, expected_halfblock, sizeof(expected_halfblock))) + goto err; + + /* Clear state of DRNG */ + memset(&chacha20.key.u[0], 0, 48); + + /* Reseed with 2 key blocks */ + ret = crypto_cb->lrng_drng_seed_helper(&chacha20, seed, + sizeof(expected_oneblock)); + if (ret < 0) + goto err; + ret = crypto_cb->lrng_drng_generate_helper(&chacha20, outbuf, + sizeof(expected_oneblock)); + if (ret != sizeof(expected_oneblock)) + goto err; + if (memcmp(outbuf, expected_oneblock, sizeof(expected_oneblock))) + goto err; + + /* Clear state of DRNG */ + memset(&chacha20.key.u[0], 0, 48); + + /* Reseed with 1 key block and one byte */ + ret = crypto_cb->lrng_drng_seed_helper(&chacha20, seed, + sizeof(expected_block_nonalinged)); + if (ret < 0) + goto err; + ret = crypto_cb->lrng_drng_generate_helper(&chacha20, outbuf, + sizeof(expected_block_nonalinged)); + if (ret != sizeof(expected_block_nonalinged)) + goto err; + if (memcmp(outbuf, expected_block_nonalinged, + sizeof(expected_block_nonalinged))) + goto err; + + return LRNG_SELFTEST_PASSED; + +err: + pr_err("LRNG ChaCha20 DRNG self-test FAILED\n"); + return LRNG_SEFLTEST_ERROR_CHACHA20; +} + +static unsigned int lrng_selftest_status = LRNG_SELFTEST_NOT_EXECUTED; + +static int lrng_selftest(void) +{ + unsigned int ret = lrng_data_process_selftest(); + + ret |= lrng_chacha20_drng_selftest(); + ret |= lrng_hash_selftest(); + ret |= lrng_gcd_selftest(); + + if (ret) { + if (IS_ENABLED(CONFIG_LRNG_SELFTEST_PANIC)) + panic("LRNG self-tests failed: %u\n", ret); + } else { + pr_info("LRNG self-tests passed\n"); + } + + lrng_selftest_status = ret; + + if (lrng_selftest_status) + return -EFAULT; + return 0; +} + +#ifdef CONFIG_SYSFS +/* Re-perform self-test when any value is written to the sysfs file. */ +static int lrng_selftest_sysfs_set(const char *val, + const struct kernel_param *kp) +{ + return lrng_selftest(); +} + +static const struct kernel_param_ops lrng_selftest_sysfs = { + .set = lrng_selftest_sysfs_set, + .get = param_get_uint, +}; +module_param_cb(selftest_status, &lrng_selftest_sysfs, &lrng_selftest_status, + 0644); +#endif /* CONFIG_SYSFS */ + +static int __init lrng_selftest_init(void) +{ + return lrng_selftest(); +} + +module_init(lrng_selftest_init); diff --git a/drivers/char/lrng/lrng_switch.c b/drivers/char/lrng/lrng_switch.c new file mode 100644 index 0000000000000..5fce401499112 --- /dev/null +++ b/drivers/char/lrng/lrng_switch.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG DRNG switching support + * + * Copyright (C) 2016 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "lrng_internal.h" + +static int lrng_drng_switch(struct lrng_drng *drng_store, + const struct lrng_crypto_cb *cb, int node) +{ + const struct lrng_crypto_cb *old_cb; + unsigned long flags = 0, flags2 = 0; + int ret; + u8 seed[LRNG_DRNG_SECURITY_STRENGTH_BYTES]; + void *new_drng = cb->lrng_drng_alloc(LRNG_DRNG_SECURITY_STRENGTH_BYTES); + void *old_drng, *new_hash, *old_hash; + u32 current_security_strength; + bool sl = false, reset_drng = !lrng_get_available(); + + if (IS_ERR(new_drng)) { + pr_warn("could not allocate new DRNG for NUMA node %d (%ld)\n", + node, PTR_ERR(new_drng)); + return PTR_ERR(new_drng); + } + + new_hash = cb->lrng_hash_alloc(); + if (IS_ERR(new_hash)) { + pr_warn("could not allocate new LRNG pool hash (%ld)\n", + PTR_ERR(new_hash)); + cb->lrng_drng_dealloc(new_drng); + return PTR_ERR(new_hash); + } + + if (cb->lrng_hash_digestsize(new_hash) > LRNG_MAX_DIGESTSIZE) { + pr_warn("digest size of newly requested hash too large\n"); + cb->lrng_hash_dealloc(new_hash); + cb->lrng_drng_dealloc(new_drng); + return -EINVAL; + } + + current_security_strength = lrng_security_strength(); + lrng_drng_lock(drng_store, &flags); + + /* + * Pull from existing DRNG to seed new DRNG regardless of seed status + * of old DRNG -- the entropy state for the DRNG is left unchanged which + * implies that als the new DRNG is reseeded when deemed necessary. This + * seeding of the new DRNG shall only ensure that the new DRNG has the + * same entropy as the old DRNG. + */ + ret = drng_store->crypto_cb->lrng_drng_generate_helper( + drng_store->drng, seed, sizeof(seed)); + lrng_drng_unlock(drng_store, &flags); + + if (ret < 0) { + reset_drng = true; + pr_warn("getting random data from DRNG failed for NUMA node %d (%d)\n", + node, ret); + } else { + /* seed new DRNG with data */ + ret = cb->lrng_drng_seed_helper(new_drng, seed, ret); + memzero_explicit(seed, sizeof(seed)); + if (ret < 0) { + reset_drng = true; + pr_warn("seeding of new DRNG failed for NUMA node %d (%d)\n", + node, ret); + } else { + pr_debug("seeded new DRNG of NUMA node %d instance from old DRNG instance\n", + node); + } + } + + mutex_lock(&drng_store->lock); + write_lock_irqsave(&drng_store->hash_lock, flags2); + /* + * If we switch the DRNG from the initial ChaCha20 DRNG to something + * else, there is a lock transition from spin lock to mutex (see + * lrng_drng_is_atomic and how the lock is taken in lrng_drng_lock). + * Thus, we need to take both locks during the transition phase. + */ + if (lrng_drng_is_atomic(drng_store)) { + spin_lock_irqsave(&drng_store->spin_lock, flags); + sl = true; + } else { + __acquire(&drng_store->spin_lock); + } + + /* Trigger the switch of the aux entropy pool for current node. */ + if (drng_store == lrng_drng_init_instance()) { + ret = lrng_aux_switch_hash(cb, new_hash, drng_store->crypto_cb); + if (ret) + goto err; + } + + /* Trigger the switch of the per-CPU entropy pools for current node. */ + ret = lrng_pcpu_switch_hash(node, cb, new_hash, drng_store->crypto_cb); + if (ret) { + /* Switch the crypto operation back to be consistent */ + WARN_ON(lrng_aux_switch_hash(drng_store->crypto_cb, + drng_store->hash, cb)); + } else { + if (reset_drng) + lrng_drng_reset(drng_store); + + old_drng = drng_store->drng; + old_cb = drng_store->crypto_cb; + drng_store->drng = new_drng; + drng_store->crypto_cb = cb; + + old_hash = drng_store->hash; + drng_store->hash = new_hash; + pr_info("Entropy pool read-hash allocated for DRNG for NUMA node %d\n", + node); + + /* Reseed if previous LRNG security strength was insufficient */ + if (current_security_strength < lrng_security_strength()) + drng_store->force_reseed = true; + + /* Force oversampling seeding as we initialize DRNG */ + if (IS_ENABLED(CONFIG_LRNG_OVERSAMPLE_ENTROPY_SOURCES)) + lrng_unset_fully_seeded(drng_store); + + if (lrng_state_min_seeded()) + lrng_set_entropy_thresh(lrng_get_seed_entropy_osr( + drng_store->fully_seeded)); + + /* ChaCha20 serves as atomic instance left untouched. */ + if (old_drng != &chacha20) { + old_cb->lrng_drng_dealloc(old_drng); + old_cb->lrng_hash_dealloc(old_hash); + } + + pr_info("DRNG of NUMA node %d switched\n", node); + } + +err: + if (sl) + spin_unlock_irqrestore(&drng_store->spin_lock, flags); + else + __release(&drng_store->spin_lock); + write_unlock_irqrestore(&drng_store->hash_lock, flags2); + mutex_unlock(&drng_store->lock); + + return ret; +} + +/* + * Switch the existing DRNG instances with new using the new crypto callbacks. + * The caller must hold the lrng_crypto_cb_update lock. + */ +static int lrng_drngs_switch(const struct lrng_crypto_cb *cb) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + int ret = 0; + + /* Update DRNG */ + if (lrng_drng) { + u32 node; + + for_each_online_node(node) { + if (lrng_drng[node]) + ret = lrng_drng_switch(lrng_drng[node], cb, + node); + } + } else { + ret = lrng_drng_switch(lrng_drng_init, cb, 0); + } + + if (!ret) + lrng_set_available(); + + return 0; +} + +/* + * lrng_set_drng_cb - Register new cryptographic callback functions for DRNG + * The registering implies that all old DRNG states are replaced with new + * DRNG states. + * + * @cb: Callback functions to be registered -- if NULL, use the default + * callbacks pointing to the ChaCha20 DRNG. + * + * Return: + * * 0 on success + * * < 0 on error + */ +int lrng_set_drng_cb(const struct lrng_crypto_cb *cb) +{ + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + int ret; + + if (!cb) + cb = &lrng_cc20_crypto_cb; + + mutex_lock(&lrng_crypto_cb_update); + + /* + * If a callback other than the default is set, allow it only to be + * set back to the default callback. This ensures that multiple + * different callbacks can be registered at the same time. If a + * callback different from the current callback and the default + * callback shall be set, the current callback must be deregistered + * (e.g. the kernel module providing it must be unloaded) and the new + * implementation can be registered. + */ + if ((cb != &lrng_cc20_crypto_cb) && + (lrng_drng_init->crypto_cb != &lrng_cc20_crypto_cb)) { + pr_warn("disallow setting new cipher callbacks, unload the old callbacks first!\n"); + ret = -EINVAL; + goto out; + } + + ret = lrng_drngs_switch(cb); + +out: + mutex_unlock(&lrng_crypto_cb_update); + return ret; +} +EXPORT_SYMBOL(lrng_set_drng_cb); diff --git a/drivers/char/lrng/lrng_testing.c b/drivers/char/lrng/lrng_testing.c new file mode 100644 index 0000000000000..3517319acb436 --- /dev/null +++ b/drivers/char/lrng/lrng_testing.c @@ -0,0 +1,689 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Linux Random Number Generator (LRNG) testing interfaces + * + * Copyright (C) 2019 - 2021, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lrng_internal.h" + +#define LRNG_TESTING_RINGBUFFER_SIZE 1024 +#define LRNG_TESTING_RINGBUFFER_MASK (LRNG_TESTING_RINGBUFFER_SIZE - 1) + +struct lrng_testing { + u32 lrng_testing_rb[LRNG_TESTING_RINGBUFFER_SIZE]; + u32 rb_reader; + u32 rb_writer; + atomic_t lrng_testing_enabled; + spinlock_t lock; + wait_queue_head_t read_wait; +}; + +/*************************** Generic Data Handling ****************************/ + +/* + * boot variable: + * 0 ==> No boot test, gathering of runtime data allowed + * 1 ==> Boot test enabled and ready for collecting data, gathering runtime + * data is disabled + * 2 ==> Boot test completed and disabled, gathering of runtime data is + * disabled + */ + +static void lrng_testing_reset(struct lrng_testing *data) +{ + unsigned long flags; + + spin_lock_irqsave(&data->lock, flags); + data->rb_reader = 0; + data->rb_writer = 0; + spin_unlock_irqrestore(&data->lock, flags); +} + +static void lrng_testing_init(struct lrng_testing *data, u32 boot) +{ + /* + * The boot time testing implies we have a running test. If the + * caller wants to clear it, he has to unset the boot_test flag + * at runtime via sysfs to enable regular runtime testing + */ + if (boot) + return; + + lrng_testing_reset(data); + atomic_set(&data->lrng_testing_enabled, 1); + pr_warn("Enabling data collection\n"); +} + +static void lrng_testing_fini(struct lrng_testing *data, u32 boot) +{ + /* If we have boot data, we do not reset yet to allow data to be read */ + if (boot) + return; + + atomic_set(&data->lrng_testing_enabled, 0); + lrng_testing_reset(data); + pr_warn("Disabling data collection\n"); +} + +static bool lrng_testing_store(struct lrng_testing *data, u32 value, + u32 *boot) +{ + unsigned long flags; + + if (!atomic_read(&data->lrng_testing_enabled) && (*boot != 1)) + return false; + + spin_lock_irqsave(&data->lock, flags); + + /* + * Disable entropy testing for boot time testing after ring buffer + * is filled. + */ + if (*boot) { + if (data->rb_writer > LRNG_TESTING_RINGBUFFER_SIZE) { + *boot = 2; + pr_warn_once("One time data collection test disabled\n"); + spin_unlock_irqrestore(&data->lock, flags); + return false; + } + + if (data->rb_writer == 1) + pr_warn("One time data collection test enabled\n"); + } + + data->lrng_testing_rb[data->rb_writer & LRNG_TESTING_RINGBUFFER_MASK] = + value; + data->rb_writer++; + + spin_unlock_irqrestore(&data->lock, flags); + + if (wq_has_sleeper(&data->read_wait)) + wake_up_interruptible(&data->read_wait); + + return true; +} + +static bool lrng_testing_have_data(struct lrng_testing *data) +{ + return ((data->rb_writer & LRNG_TESTING_RINGBUFFER_MASK) != + (data->rb_reader & LRNG_TESTING_RINGBUFFER_MASK)); +} + +static int lrng_testing_reader(struct lrng_testing *data, u32 *boot, + u8 *outbuf, u32 outbuflen) +{ + unsigned long flags; + int collected_data = 0; + + lrng_testing_init(data, *boot); + + while (outbuflen) { + spin_lock_irqsave(&data->lock, flags); + + /* We have no data or reached the writer. */ + if (!data->rb_writer || + (data->rb_writer == data->rb_reader)) { + + spin_unlock_irqrestore(&data->lock, flags); + + /* + * Now we gathered all boot data, enable regular data + * collection. + */ + if (*boot) { + *boot = 0; + goto out; + } + + wait_event_interruptible(data->read_wait, + lrng_testing_have_data(data)); + if (signal_pending(current)) { + collected_data = -ERESTARTSYS; + goto out; + } + + continue; + } + + /* We copy out word-wise */ + if (outbuflen < sizeof(u32)) { + spin_unlock_irqrestore(&data->lock, flags); + goto out; + } + + memcpy(outbuf, &data->lrng_testing_rb[data->rb_reader], + sizeof(u32)); + data->rb_reader++; + + spin_unlock_irqrestore(&data->lock, flags); + + outbuf += sizeof(u32); + outbuflen -= sizeof(u32); + collected_data += sizeof(u32); + } + +out: + lrng_testing_fini(data, *boot); + return collected_data; +} + +static int lrng_testing_extract_user(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos, + int (*reader)(u8 *outbuf, u32 outbuflen)) +{ + u8 *tmp, *tmp_aligned; + int ret = 0, large_request = (nbytes > 256); + + if (!nbytes) + return 0; + + /* + * The intention of this interface is for collecting at least + * 1000 samples due to the SP800-90B requirements. So, we make no + * effort in avoiding allocating more memory that actually needed + * by the user. Hence, we allocate sufficient memory to always hold + * that amount of data. + */ + tmp = kmalloc(LRNG_TESTING_RINGBUFFER_SIZE + sizeof(u32), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + tmp_aligned = PTR_ALIGN(tmp, sizeof(u32)); + + while (nbytes) { + int i; + + if (large_request && need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + schedule(); + } + + i = min_t(int, nbytes, LRNG_TESTING_RINGBUFFER_SIZE); + i = reader(tmp_aligned, i); + if (i <= 0) { + if (i < 0) + ret = i; + break; + } + if (copy_to_user(buf, tmp_aligned, i)) { + ret = -EFAULT; + break; + } + + nbytes -= i; + buf += i; + ret += i; + } + + kfree_sensitive(tmp); + + if (ret > 0) + *ppos += ret; + + return ret; +} + +/************** Raw High-Resolution Timer Entropy Data Handling ***************/ + +#ifdef CONFIG_LRNG_RAW_HIRES_ENTROPY + +static u32 boot_raw_hires_test = 0; +module_param(boot_raw_hires_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_hires_test, "Enable gathering boot time high resolution timer entropy of the first entropy events"); + +static struct lrng_testing lrng_raw_hires = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_hires.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_hires.read_wait) +}; + +bool lrng_raw_hires_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_hires, value, &boot_raw_hires_test); +} + +static int lrng_raw_hires_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_hires, &boot_raw_hires_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_hires_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_hires_entropy_reader); +} + +static const struct file_operations lrng_raw_hires_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_hires_read, +}; + +#endif /* CONFIG_LRNG_RAW_HIRES_ENTROPY */ + +/********************* Raw Jiffies Entropy Data Handling **********************/ + +#ifdef CONFIG_LRNG_RAW_JIFFIES_ENTROPY + +static u32 boot_raw_jiffies_test = 0; +module_param(boot_raw_jiffies_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_jiffies_test, "Enable gathering boot time high resolution timer entropy of the first entropy events"); + +static struct lrng_testing lrng_raw_jiffies = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_jiffies.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_jiffies.read_wait) +}; + +bool lrng_raw_jiffies_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_jiffies, value, + &boot_raw_jiffies_test); +} + +static int lrng_raw_jiffies_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_jiffies, &boot_raw_jiffies_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_jiffies_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_jiffies_entropy_reader); +} + +static const struct file_operations lrng_raw_jiffies_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_jiffies_read, +}; + +#endif /* CONFIG_LRNG_RAW_JIFFIES_ENTROPY */ + +/************************** Raw IRQ Data Handling ****************************/ + +#ifdef CONFIG_LRNG_RAW_IRQ_ENTROPY + +static u32 boot_raw_irq_test = 0; +module_param(boot_raw_irq_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_irq_test, "Enable gathering boot time entropy of the first IRQ entropy events"); + +static struct lrng_testing lrng_raw_irq = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_irq.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_irq.read_wait) +}; + +bool lrng_raw_irq_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_irq, value, &boot_raw_irq_test); +} + +static int lrng_raw_irq_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_irq, &boot_raw_irq_test, outbuf, + outbuflen); +} + +static ssize_t lrng_raw_irq_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_irq_entropy_reader); +} + +static const struct file_operations lrng_raw_irq_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_irq_read, +}; + +#endif /* CONFIG_LRNG_RAW_IRQ_ENTROPY */ + +/************************ Raw IRQFLAGS Data Handling **************************/ + +#ifdef CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY + +static u32 boot_raw_irqflags_test = 0; +module_param(boot_raw_irqflags_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_irqflags_test, "Enable gathering boot time entropy of the first IRQ flags entropy events"); + +static struct lrng_testing lrng_raw_irqflags = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_irqflags.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_irqflags.read_wait) +}; + +bool lrng_raw_irqflags_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_irqflags, value, + &boot_raw_irqflags_test); +} + +static int lrng_raw_irqflags_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_irqflags, &boot_raw_irqflags_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_irqflags_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_irqflags_entropy_reader); +} + +static const struct file_operations lrng_raw_irqflags_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_irqflags_read, +}; + +#endif /* CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY */ + +/************************ Raw _RET_IP_ Data Handling **************************/ + +#ifdef CONFIG_LRNG_RAW_RETIP_ENTROPY + +static u32 boot_raw_retip_test = 0; +module_param(boot_raw_retip_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_retip_test, "Enable gathering boot time entropy of the first return instruction pointer entropy events"); + +static struct lrng_testing lrng_raw_retip = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_retip.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_retip.read_wait) +}; + +bool lrng_raw_retip_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_retip, value, &boot_raw_retip_test); +} + +static int lrng_raw_retip_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_retip, &boot_raw_retip_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_retip_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_retip_entropy_reader); +} + +static const struct file_operations lrng_raw_retip_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_retip_read, +}; + +#endif /* CONFIG_LRNG_RAW_RETIP_ENTROPY */ + +/********************** Raw IRQ register Data Handling ************************/ + +#ifdef CONFIG_LRNG_RAW_REGS_ENTROPY + +static u32 boot_raw_regs_test = 0; +module_param(boot_raw_regs_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_regs_test, "Enable gathering boot time entropy of the first interrupt register entropy events"); + +static struct lrng_testing lrng_raw_regs = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_regs.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_regs.read_wait) +}; + +bool lrng_raw_regs_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_regs, value, &boot_raw_regs_test); +} + +static int lrng_raw_regs_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_regs, &boot_raw_regs_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_regs_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_regs_entropy_reader); +} + +static const struct file_operations lrng_raw_regs_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_regs_read, +}; + +#endif /* CONFIG_LRNG_RAW_REGS_ENTROPY */ + +/********************** Raw Entropy Array Data Handling ***********************/ + +#ifdef CONFIG_LRNG_RAW_ARRAY + +static u32 boot_raw_array = 0; +module_param(boot_raw_array, uint, 0644); +MODULE_PARM_DESC(boot_raw_array, "Enable gathering boot time raw noise array data of the first entropy events"); + +static struct lrng_testing lrng_raw_array = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_array.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_array.read_wait) +}; + +bool lrng_raw_array_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_array, value, &boot_raw_array); +} + +static int lrng_raw_array_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_array, &boot_raw_array, outbuf, + outbuflen); +} + +static ssize_t lrng_raw_array_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_array_entropy_reader); +} + +static const struct file_operations lrng_raw_array_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_array_read, +}; + +#endif /* CONFIG_LRNG_RAW_ARRAY */ + +/******************** Interrupt Performance Data Handling *********************/ + +#ifdef CONFIG_LRNG_IRQ_PERF + +static u32 boot_irq_perf = 0; +module_param(boot_irq_perf, uint, 0644); +MODULE_PARM_DESC(boot_irq_perf, "Enable gathering boot time interrupt performance data of the first entropy events"); + +static struct lrng_testing lrng_irq_perf = { + .rb_reader = 0, + .rb_writer = 0, + .lock = __SPIN_LOCK_UNLOCKED(lrng_irq_perf.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_irq_perf.read_wait) +}; + +bool lrng_perf_time(u32 start) +{ + return lrng_testing_store(&lrng_irq_perf, random_get_entropy() - start, + &boot_irq_perf); +} + +static int lrng_irq_perf_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_irq_perf, &boot_irq_perf, outbuf, + outbuflen); +} + +static ssize_t lrng_irq_perf_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_irq_perf_reader); +} + +static const struct file_operations lrng_irq_perf_fops = { + .owner = THIS_MODULE, + .read = lrng_irq_perf_read, +}; + +#endif /* CONFIG_LRNG_IRQ_PERF */ + +/*********************************** ACVT ************************************/ + +#ifdef CONFIG_LRNG_ACVT_HASH + +/* maximum amount of data to be hashed as defined by ACVP */ +#define LRNG_ACVT_MAX_SHA_MSG (65536 >> 3) + +/* + * As we use static variables to store the data, it is clear that the + * test interface is only able to handle single threaded testing. This is + * considered to be sufficient for testing. If multi-threaded use of the + * ACVT test interface would be performed, the caller would get garbage + * but the kernel operation is unaffected by this. + */ +static u8 lrng_acvt_hash_data[LRNG_ACVT_MAX_SHA_MSG] + __aligned(LRNG_KCAPI_ALIGN); +static atomic_t lrng_acvt_hash_data_size = ATOMIC_INIT(0); +static u8 lrng_acvt_hash_digest[LRNG_ATOMIC_DIGEST_SIZE]; + +static ssize_t lrng_acvt_hash_write(struct file *file, const char __user *buf, + size_t nbytes, loff_t *ppos) +{ + if (nbytes > LRNG_ACVT_MAX_SHA_MSG) + return -EINVAL; + + atomic_set(&lrng_acvt_hash_data_size, (int)nbytes); + + return simple_write_to_buffer(lrng_acvt_hash_data, + LRNG_ACVT_MAX_SHA_MSG, ppos, buf, nbytes); +} + +static ssize_t lrng_acvt_hash_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_crypto_cb *crypto_cb = &lrng_cc20_crypto_cb; + ssize_t ret; + + if (count > LRNG_ATOMIC_DIGEST_SIZE) + return -EINVAL; + + ret = crypto_cb->lrng_hash_init(shash, NULL) ?: + crypto_cb->lrng_hash_update(shash, lrng_acvt_hash_data, + atomic_read_u32(&lrng_acvt_hash_data_size)) ?: + crypto_cb->lrng_hash_final(shash, lrng_acvt_hash_digest); + if (ret) + return ret; + + return simple_read_from_buffer(to, count, ppos, lrng_acvt_hash_digest, + sizeof(lrng_acvt_hash_digest)); +} + +static const struct file_operations lrng_acvt_hash_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .llseek = default_llseek, + .read = lrng_acvt_hash_read, + .write = lrng_acvt_hash_write, +}; + +#endif /* CONFIG_LRNG_ACVT_DRNG */ + +/************************************************************************** + * Debugfs interface + **************************************************************************/ + +static int __init lrng_raw_init(void) +{ + struct dentry *lrng_raw_debugfs_root; + + lrng_raw_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL); + +#ifdef CONFIG_LRNG_RAW_HIRES_ENTROPY + debugfs_create_file_unsafe("lrng_raw_hires", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_hires_fops); +#endif +#ifdef CONFIG_LRNG_RAW_JIFFIES_ENTROPY + debugfs_create_file_unsafe("lrng_raw_jiffies", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_jiffies_fops); +#endif +#ifdef CONFIG_LRNG_RAW_IRQ_ENTROPY + debugfs_create_file_unsafe("lrng_raw_irq", 0400, lrng_raw_debugfs_root, + NULL, &lrng_raw_irq_fops); +#endif +#ifdef CONFIG_LRNG_RAW_IRQFLAGS_ENTROPY + debugfs_create_file_unsafe("lrng_raw_irqflags", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_irqflags_fops); +#endif +#ifdef CONFIG_LRNG_RAW_RETIP_ENTROPY + debugfs_create_file_unsafe("lrng_raw_retip", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_retip_fops); +#endif +#ifdef CONFIG_LRNG_RAW_REGS_ENTROPY + debugfs_create_file_unsafe("lrng_raw_regs", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_regs_fops); +#endif +#ifdef CONFIG_LRNG_RAW_ARRAY + debugfs_create_file_unsafe("lrng_raw_array", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_array_fops); +#endif +#ifdef CONFIG_LRNG_IRQ_PERF + debugfs_create_file_unsafe("lrng_irq_perf", 0400, lrng_raw_debugfs_root, + NULL, &lrng_irq_perf_fops); +#endif +#ifdef CONFIG_LRNG_ACVT_HASH + debugfs_create_file_unsafe("lrng_acvt_hash", 0600, + lrng_raw_debugfs_root, NULL, + &lrng_acvt_hash_fops); +#endif + + return 0; +} + +module_init(lrng_raw_init); diff --git a/drivers/char/random.c b/drivers/char/random.c index 3404a91edf292..19bf14e253f70 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -738,11 +738,17 @@ static void invalidate_batched_entropy(void); static void numa_crng_init(void); static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER); static int __init parse_trust_cpu(char *arg) { return kstrtobool(arg, &trust_cpu); } +static int __init parse_trust_bootloader(char *arg) +{ + return kstrtobool(arg, &trust_bootloader); +} early_param("random.trust_cpu", parse_trust_cpu); +early_param("random.trust_bootloader", parse_trust_bootloader); static bool crng_init_try_arch(struct crng_state *crng) { @@ -2229,7 +2235,7 @@ EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); */ void add_bootloader_randomness(const void *buf, unsigned int size) { - if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) + if (trust_bootloader) add_hwgenerator_randomness(buf, size, size * 8); else add_device_randomness(buf, size); diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index b009e7479b702..783d65fc71f07 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -274,14 +274,6 @@ static void tpm_dev_release(struct device *dev) kfree(chip); } -static void tpm_devs_release(struct device *dev) -{ - struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); - - /* release the master device reference */ - put_device(&chip->dev); -} - /** * tpm_class_shutdown() - prepare the TPM device for loss of power. * @dev: device to which the chip is associated. @@ -344,7 +336,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->dev_num = rc; device_initialize(&chip->dev); - device_initialize(&chip->devs); chip->dev.class = tpm_class; chip->dev.class->shutdown_pre = tpm_class_shutdown; @@ -352,29 +343,12 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->dev.parent = pdev; chip->dev.groups = chip->groups; - chip->devs.parent = pdev; - chip->devs.class = tpmrm_class; - chip->devs.release = tpm_devs_release; - /* get extra reference on main device to hold on - * behalf of devs. This holds the chip structure - * while cdevs is in use. The corresponding put - * is in the tpm_devs_release (TPM2 only) - */ - if (chip->flags & TPM_CHIP_FLAG_TPM2) - get_device(&chip->dev); - if (chip->dev_num == 0) chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR); else chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num); - chip->devs.devt = - MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); - rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num); - if (rc) - goto out; - rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); if (rc) goto out; @@ -382,9 +356,7 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->flags |= TPM_CHIP_FLAG_VIRTUAL; cdev_init(&chip->cdev, &tpm_fops); - cdev_init(&chip->cdevs, &tpmrm_fops); chip->cdev.owner = THIS_MODULE; - chip->cdevs.owner = THIS_MODULE; rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE); if (rc) { @@ -396,7 +368,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, return chip; out: - put_device(&chip->devs); put_device(&chip->dev); return ERR_PTR(rc); } @@ -445,14 +416,9 @@ static int tpm_add_char_device(struct tpm_chip *chip) } if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip)) { - rc = cdev_device_add(&chip->cdevs, &chip->devs); - if (rc) { - dev_err(&chip->devs, - "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", - dev_name(&chip->devs), MAJOR(chip->devs.devt), - MINOR(chip->devs.devt), rc); - return rc; - } + rc = tpm_devs_add(chip); + if (rc) + goto err_del_cdev; } /* Make the chip available. */ @@ -460,6 +426,10 @@ static int tpm_add_char_device(struct tpm_chip *chip) idr_replace(&dev_nums_idr, chip, chip->dev_num); mutex_unlock(&idr_lock); + return 0; + +err_del_cdev: + cdev_device_del(&chip->cdev, &chip->dev); return rc; } @@ -654,7 +624,7 @@ void tpm_chip_unregister(struct tpm_chip *chip) hwrng_unregister(&chip->hwrng); tpm_bios_log_teardown(chip); if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip)) - cdev_device_del(&chip->cdevs, &chip->devs); + tpm_devs_remove(chip); tpm_del_char_device(chip); } EXPORT_SYMBOL_GPL(tpm_chip_unregister); diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index c08cbb306636b..dc4c0a0a51290 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -69,7 +69,13 @@ static void tpm_dev_async_work(struct work_struct *work) ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer, sizeof(priv->data_buffer)); tpm_put_ops(priv->chip); - if (ret > 0) { + + /* + * If ret is > 0 then tpm_dev_transmit returned the size of the + * response. If ret is < 0 then tpm_dev_transmit failed and + * returned an error code. + */ + if (ret != 0) { priv->response_length = ret; mod_timer(&priv->user_read_timer, jiffies + (120 * HZ)); } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 283f78211c3a7..2163c6ee0d364 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -234,6 +234,8 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd, size_t cmdsiz); int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf, size_t *bufsiz); +int tpm_devs_add(struct tpm_chip *chip); +void tpm_devs_remove(struct tpm_chip *chip); void tpm_bios_log_setup(struct tpm_chip *chip); void tpm_bios_log_teardown(struct tpm_chip *chip); diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c index 97e916856cf3e..ffb35f0154c16 100644 --- a/drivers/char/tpm/tpm2-space.c +++ b/drivers/char/tpm/tpm2-space.c @@ -58,12 +58,12 @@ int tpm2_init_space(struct tpm_space *space, unsigned int buf_size) void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space) { - mutex_lock(&chip->tpm_mutex); - if (!tpm_chip_start(chip)) { + + if (tpm_try_get_ops(chip) == 0) { tpm2_flush_sessions(chip, space); - tpm_chip_stop(chip); + tpm_put_ops(chip); } - mutex_unlock(&chip->tpm_mutex); + kfree(space->context_buf); kfree(space->session_buf); } @@ -574,3 +574,68 @@ int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, dev_err(&chip->dev, "%s: error %d\n", __func__, rc); return rc; } + +/* + * Put the reference to the main device. + */ +static void tpm_devs_release(struct device *dev) +{ + struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); + + /* release the master device reference */ + put_device(&chip->dev); +} + +/* + * Remove the device file for exposed TPM spaces and release the device + * reference. This may also release the reference to the master device. + */ +void tpm_devs_remove(struct tpm_chip *chip) +{ + cdev_device_del(&chip->cdevs, &chip->devs); + put_device(&chip->devs); +} + +/* + * Add a device file to expose TPM spaces. Also take a reference to the + * main device. + */ +int tpm_devs_add(struct tpm_chip *chip) +{ + int rc; + + device_initialize(&chip->devs); + chip->devs.parent = chip->dev.parent; + chip->devs.class = tpmrm_class; + + /* + * Get extra reference on main device to hold on behalf of devs. + * This holds the chip structure while cdevs is in use. The + * corresponding put is in the tpm_devs_release. + */ + get_device(&chip->dev); + chip->devs.release = tpm_devs_release; + chip->devs.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); + cdev_init(&chip->cdevs, &tpmrm_fops); + chip->cdevs.owner = THIS_MODULE; + + rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); + if (rc) + goto err_put_devs; + + rc = cdev_device_add(&chip->cdevs, &chip->devs); + if (rc) { + dev_err(&chip->devs, + "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", + dev_name(&chip->devs), MAJOR(chip->devs.devt), + MINOR(chip->devs.devt), rc); + goto err_put_devs; + } + + return 0; + +err_put_devs: + put_device(&chip->devs); + + return rc; +} diff --git a/drivers/clk/actions/owl-s700.c b/drivers/clk/actions/owl-s700.c index a2f34d13fb543..6ea7da1d6d755 100644 --- a/drivers/clk/actions/owl-s700.c +++ b/drivers/clk/actions/owl-s700.c @@ -162,6 +162,7 @@ static struct clk_div_table hdmia_div_table[] = { static struct clk_div_table rmii_div_table[] = { {0, 4}, {1, 10}, + {0, 0} }; /* divider clocks */ diff --git a/drivers/clk/actions/owl-s900.c b/drivers/clk/actions/owl-s900.c index 790890978424a..5144ada2c7e1a 100644 --- a/drivers/clk/actions/owl-s900.c +++ b/drivers/clk/actions/owl-s900.c @@ -140,7 +140,7 @@ static struct clk_div_table rmii_ref_div_table[] = { static struct clk_div_table usb3_mac_div_table[] = { { 1, 2 }, { 2, 3 }, { 3, 4 }, - { 0, 8 }, + { 0, 0 } }; static struct clk_div_table i2s_div_table[] = { diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c index 369dfafabbca2..060e908086a13 100644 --- a/drivers/clk/at91/sama7g5.c +++ b/drivers/clk/at91/sama7g5.c @@ -696,16 +696,16 @@ static const struct { { .n = "pdmc0_gclk", .id = 68, .r = { .max = 50000000 }, - .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, - .pp_mux_table = { 5, 8, }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, .pp_count = 2, .pp_chg_id = INT_MIN, }, { .n = "pdmc1_gclk", .id = 69, .r = { .max = 50000000, }, - .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, - .pp_mux_table = { 5, 8, }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, .pp_count = 2, .pp_chg_id = INT_MIN, }, diff --git a/drivers/clk/clk-clps711x.c b/drivers/clk/clk-clps711x.c index a2c6486ef1708..f8417ee2961aa 100644 --- a/drivers/clk/clk-clps711x.c +++ b/drivers/clk/clk-clps711x.c @@ -28,11 +28,13 @@ static const struct clk_div_table spi_div_table[] = { { .val = 1, .div = 8, }, { .val = 2, .div = 2, }, { .val = 3, .div = 1, }, + { /* sentinel */ } }; static const struct clk_div_table timer_div_table[] = { { .val = 0, .div = 256, }, { .val = 1, .div = 1, }, + { /* sentinel */ } }; struct clps711x_clk { diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8de6a22498e70..01b64b962e76f 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3456,6 +3456,19 @@ static void clk_core_reparent_orphans_nolock(void) __clk_set_parent_after(orphan, parent, NULL); __clk_recalc_accuracies(orphan); __clk_recalc_rates(orphan, 0); + + /* + * __clk_init_parent() will set the initial req_rate to + * 0 if the clock doesn't have clk_ops::recalc_rate and + * is an orphan when it's registered. + * + * 'req_rate' is used by clk_set_rate_range() and + * clk_put() to trigger a clk_set_rate() call whenever + * the boundaries are modified. Let's make sure + * 'req_rate' is set to something non-zero so that + * clk_set_rate_range() doesn't drop the frequency. + */ + orphan->req_rate = orphan->rate; } } } @@ -3773,8 +3786,9 @@ struct clk *clk_hw_create_clk(struct device *dev, struct clk_hw *hw, struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id) { struct device *dev = hw->core->dev; + const char *name = dev ? dev_name(dev) : NULL; - return clk_hw_create_clk(dev, hw, dev_name(dev), con_id); + return clk_hw_create_clk(dev, hw, name, con_id); } EXPORT_SYMBOL(clk_hw_get_clk); diff --git a/drivers/clk/hisilicon/clk-hi3559a.c b/drivers/clk/hisilicon/clk-hi3559a.c index 56012a3d02192..9ea1a80acbe8b 100644 --- a/drivers/clk/hisilicon/clk-hi3559a.c +++ b/drivers/clk/hisilicon/clk-hi3559a.c @@ -611,8 +611,8 @@ static struct hisi_mux_clock hi3559av100_shub_mux_clks[] = { /* shub div clk */ -static struct clk_div_table shub_spi_clk_table[] = {{0, 8}, {1, 4}, {2, 2}}; -static struct clk_div_table shub_uart_div_clk_table[] = {{1, 8}, {2, 4}}; +static struct clk_div_table shub_spi_clk_table[] = {{0, 8}, {1, 4}, {2, 2}, {/*sentinel*/}}; +static struct clk_div_table shub_uart_div_clk_table[] = {{1, 8}, {2, 4}, {/*sentinel*/}}; static struct hisi_divider_clock hi3559av100_shub_div_clks[] = { { HI3559AV100_SHUB_SPI_SOURCE_CLK, "clk_spi_clk", "shub_clk", 0, 0x20, 24, 2, diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c index c4e0f1c07192f..3f6fd7ef2a68f 100644 --- a/drivers/clk/imx/clk-imx7d.c +++ b/drivers/clk/imx/clk-imx7d.c @@ -849,7 +849,6 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node) hws[IMX7D_WDOG4_ROOT_CLK] = imx_clk_hw_gate4("wdog4_root_clk", "wdog_post_div", base + 0x49f0, 0); hws[IMX7D_KPP_ROOT_CLK] = imx_clk_hw_gate4("kpp_root_clk", "ipg_root_clk", base + 0x4aa0, 0); hws[IMX7D_CSI_MCLK_ROOT_CLK] = imx_clk_hw_gate4("csi_mclk_root_clk", "csi_mclk_post_div", base + 0x4490, 0); - hws[IMX7D_AUDIO_MCLK_ROOT_CLK] = imx_clk_hw_gate4("audio_mclk_root_clk", "audio_mclk_post_div", base + 0x4790, 0); hws[IMX7D_WRCLK_ROOT_CLK] = imx_clk_hw_gate4("wrclk_root_clk", "wrclk_post_div", base + 0x47a0, 0); hws[IMX7D_USB_CTRL_CLK] = imx_clk_hw_gate4("usb_ctrl_clk", "ahb_root_clk", base + 0x4680, 0); hws[IMX7D_USB_PHY1_CLK] = imx_clk_hw_gate4("usb_phy1_clk", "pll_usb1_main_clk", base + 0x46a0, 0); diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c index b23758083ce52..5e31a6a24b3a3 100644 --- a/drivers/clk/imx/clk-imx8qxp-lpcg.c +++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c @@ -248,7 +248,7 @@ static int imx_lpcg_parse_clks_from_dt(struct platform_device *pdev, for (i = 0; i < count; i++) { idx = bit_offset[i] / 4; - if (idx > IMX_LPCG_MAX_CLKS) { + if (idx >= IMX_LPCG_MAX_CLKS) { dev_warn(&pdev->dev, "invalid bit offset of clock %d\n", i); ret = -EINVAL; diff --git a/drivers/clk/loongson1/clk-loongson1c.c b/drivers/clk/loongson1/clk-loongson1c.c index 703f87622cf5f..1ebf740380efb 100644 --- a/drivers/clk/loongson1/clk-loongson1c.c +++ b/drivers/clk/loongson1/clk-loongson1c.c @@ -37,6 +37,7 @@ static const struct clk_div_table ahb_div_table[] = { [1] = { .val = 1, .div = 4 }, [2] = { .val = 2, .div = 3 }, [3] = { .val = 3, .div = 3 }, + [4] = { /* sentinel */ } }; void __init ls1x_clk_init(void) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index e1b1b426fae4b..f675fd969c4de 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -264,7 +264,7 @@ static int clk_rcg2_determine_floor_rate(struct clk_hw *hw, static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f) { - u32 cfg, mask; + u32 cfg, mask, d_val, not2d_val, n_minus_m; struct clk_hw *hw = &rcg->clkr.hw; int ret, index = qcom_find_src_index(hw, rcg->parent_map, f->src); @@ -283,8 +283,17 @@ static int __clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f) if (ret) return ret; + /* Calculate 2d value */ + d_val = f->n; + + n_minus_m = f->n - f->m; + n_minus_m *= 2; + + d_val = clamp_t(u32, d_val, f->m, n_minus_m); + not2d_val = ~d_val & mask; + ret = regmap_update_bits(rcg->clkr.regmap, - RCG_D_OFFSET(rcg), mask, ~f->n); + RCG_D_OFFSET(rcg), mask, not2d_val); if (ret) return ret; } @@ -720,6 +729,7 @@ static const struct frac_entry frac_table_pixel[] = { { 2, 9 }, { 4, 9 }, { 1, 1 }, + { 2, 3 }, { } }; diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c index 108fe27bee10f..541016db3c4bb 100644 --- a/drivers/clk/qcom/gcc-ipq8074.c +++ b/drivers/clk/qcom/gcc-ipq8074.c @@ -60,11 +60,6 @@ static const struct parent_map gcc_xo_gpll0_gpll0_out_main_div2_map[] = { { P_GPLL0_DIV2, 4 }, }; -static const char * const gcc_xo_gpll0[] = { - "xo", - "gpll0", -}; - static const struct parent_map gcc_xo_gpll0_map[] = { { P_XO, 0 }, { P_GPLL0, 1 }, @@ -956,6 +951,11 @@ static struct clk_rcg2 blsp1_uart6_apps_clk_src = { }, }; +static const struct clk_parent_data gcc_xo_gpll0[] = { + { .fw_name = "xo" }, + { .hw = &gpll0.clkr.hw }, +}; + static const struct freq_tbl ftbl_pcie_axi_clk_src[] = { F(19200000, P_XO, 1, 0, 0), F(200000000, P_GPLL0, 4, 0, 0), @@ -969,7 +969,7 @@ static struct clk_rcg2 pcie0_axi_clk_src = { .parent_map = gcc_xo_gpll0_map, .clkr.hw.init = &(struct clk_init_data){ .name = "pcie0_axi_clk_src", - .parent_names = gcc_xo_gpll0, + .parent_data = gcc_xo_gpll0, .num_parents = 2, .ops = &clk_rcg2_ops, }, @@ -1016,7 +1016,7 @@ static struct clk_rcg2 pcie1_axi_clk_src = { .parent_map = gcc_xo_gpll0_map, .clkr.hw.init = &(struct clk_init_data){ .name = "pcie1_axi_clk_src", - .parent_names = gcc_xo_gpll0, + .parent_data = gcc_xo_gpll0, .num_parents = 2, .ops = &clk_rcg2_ops, }, @@ -1074,7 +1074,7 @@ static struct clk_rcg2 sdcc1_apps_clk_src = { .name = "sdcc1_apps_clk_src", .parent_names = gcc_xo_gpll0_gpll2_gpll0_out_main_div2, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; @@ -1330,7 +1330,7 @@ static struct clk_rcg2 nss_ce_clk_src = { .parent_map = gcc_xo_gpll0_map, .clkr.hw.init = &(struct clk_init_data){ .name = "nss_ce_clk_src", - .parent_names = gcc_xo_gpll0, + .parent_data = gcc_xo_gpll0, .num_parents = 2, .ops = &clk_rcg2_ops, }, @@ -4329,8 +4329,7 @@ static struct clk_rcg2 pcie0_rchng_clk_src = { .parent_map = gcc_xo_gpll0_map, .clkr.hw.init = &(struct clk_init_data){ .name = "pcie0_rchng_clk_src", - .parent_hws = (const struct clk_hw *[]) { - &gpll0.clkr.hw }, + .parent_data = gcc_xo_gpll0, .num_parents = 2, .ops = &clk_rcg2_ops, }, diff --git a/drivers/clk/qcom/gcc-msm8994.c b/drivers/clk/qcom/gcc-msm8994.c index f09499999eb3a..6b702cdacbf2e 100644 --- a/drivers/clk/qcom/gcc-msm8994.c +++ b/drivers/clk/qcom/gcc-msm8994.c @@ -77,6 +77,7 @@ static struct clk_alpha_pll gpll4_early = { static struct clk_alpha_pll_postdiv gpll4 = { .offset = 0x1dc0, + .width = 4, .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll4", diff --git a/drivers/clk/renesas/r8a779f0-cpg-mssr.c b/drivers/clk/renesas/r8a779f0-cpg-mssr.c index e6ec02c2c2a8b..344957d533d81 100644 --- a/drivers/clk/renesas/r8a779f0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779f0-cpg-mssr.c @@ -103,7 +103,7 @@ static const struct cpg_core_clk r8a779f0_core_clks[] __initconst = { DEF_FIXED("s0d12_hsc", R8A779F0_CLK_S0D12_HSC, CLK_S0, 12, 1), DEF_FIXED("cl16m_hsc", R8A779F0_CLK_CL16M_HSC, CLK_S0, 48, 1), DEF_FIXED("s0d2_cc", R8A779F0_CLK_S0D2_CC, CLK_S0, 2, 1), - DEF_FIXED("rsw2", R8A779F0_CLK_RSW2, CLK_PLL5, 2, 1), + DEF_FIXED("rsw2", R8A779F0_CLK_RSW2, CLK_PLL5_DIV2, 5, 1), DEF_FIXED("cbfusa", R8A779F0_CLK_CBFUSA, CLK_EXTAL, 2, 1), DEF_FIXED("cpex", R8A779F0_CLK_CPEX, CLK_EXTAL, 2, 1), diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c index 79042bf46fe85..46359afef0d43 100644 --- a/drivers/clk/renesas/r9a07g044-cpg.c +++ b/drivers/clk/renesas/r9a07g044-cpg.c @@ -88,8 +88,8 @@ static const struct cpg_core_clk r9a07g044_core_clks[] __initconst = { DEF_FIXED(".osc", R9A07G044_OSCCLK, CLK_EXTAL, 1, 1), DEF_FIXED(".osc_div1000", CLK_OSC_DIV1000, CLK_EXTAL, 1, 1000), DEF_SAMPLL(".pll1", CLK_PLL1, CLK_EXTAL, PLL146_CONF(0)), - DEF_FIXED(".pll2", CLK_PLL2, CLK_EXTAL, 133, 2), - DEF_FIXED(".pll3", CLK_PLL3, CLK_EXTAL, 133, 2), + DEF_FIXED(".pll2", CLK_PLL2, CLK_EXTAL, 200, 3), + DEF_FIXED(".pll3", CLK_PLL3, CLK_EXTAL, 200, 3), DEF_FIXED(".pll3_400", CLK_PLL3_400, CLK_PLL3, 1, 4), DEF_FIXED(".pll3_533", CLK_PLL3_533, CLK_PLL3, 1, 3), diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c index b7be7e11b0dfe..bb8a844309bf5 100644 --- a/drivers/clk/rockchip/clk.c +++ b/drivers/clk/rockchip/clk.c @@ -180,6 +180,7 @@ static void rockchip_fractional_approximation(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate, unsigned long *m, unsigned long *n) { + struct clk_fractional_divider *fd = to_clk_fd(hw); unsigned long p_rate, p_parent_rate; struct clk_hw *p_parent; @@ -190,6 +191,8 @@ static void rockchip_fractional_approximation(struct clk_hw *hw, *parent_rate = p_parent_rate; } + fd->flags |= CLK_FRAC_DIVIDER_POWER_OF_TWO_PS; + clk_fractional_divider_general_approximation(hw, rate, parent_rate, m, n); } diff --git a/drivers/clk/starfive/clk-starfive-jh7100.c b/drivers/clk/starfive/clk-starfive-jh7100.c index 25d31afa0f871..4b59338b5d7d4 100644 --- a/drivers/clk/starfive/clk-starfive-jh7100.c +++ b/drivers/clk/starfive/clk-starfive-jh7100.c @@ -32,6 +32,13 @@ #define JH7100_CLK_MUX_MASK GENMASK(27, 24) #define JH7100_CLK_MUX_SHIFT 24 #define JH7100_CLK_DIV_MASK GENMASK(23, 0) +#define JH7100_CLK_FRAC_MASK GENMASK(15, 8) +#define JH7100_CLK_FRAC_SHIFT 8 +#define JH7100_CLK_INT_MASK GENMASK(7, 0) + +/* fractional divider min/max */ +#define JH7100_CLK_FRAC_MIN 100UL +#define JH7100_CLK_FRAC_MAX 25599UL /* clock data */ #define JH7100_GATE(_idx, _name, _flags, _parent) [_idx] = { \ @@ -55,6 +62,13 @@ .parents = { [0] = _parent }, \ } +#define JH7100_FDIV(_idx, _name, _parent) [_idx] = { \ + .name = _name, \ + .flags = 0, \ + .max = JH7100_CLK_FRAC_MAX, \ + .parents = { [0] = _parent }, \ +} + #define JH7100__MUX(_idx, _name, _nparents, ...) [_idx] = { \ .name = _name, \ .flags = 0, \ @@ -225,7 +239,7 @@ static const struct { JH7100__MUX(JH7100_CLK_USBPHY_25M, "usbphy_25m", 2, JH7100_CLK_OSC_SYS, JH7100_CLK_USBPHY_PLLDIV25M), - JH7100__DIV(JH7100_CLK_AUDIO_DIV, "audio_div", 131072, JH7100_CLK_AUDIO_ROOT), + JH7100_FDIV(JH7100_CLK_AUDIO_DIV, "audio_div", JH7100_CLK_AUDIO_ROOT), JH7100_GATE(JH7100_CLK_AUDIO_SRC, "audio_src", 0, JH7100_CLK_AUDIO_DIV), JH7100_GATE(JH7100_CLK_AUDIO_12288, "audio_12288", 0, JH7100_CLK_OSC_AUD), JH7100_GDIV(JH7100_CLK_VIN_SRC, "vin_src", 0, 4, JH7100_CLK_VIN_ROOT), @@ -399,22 +413,13 @@ static unsigned long jh7100_clk_recalc_rate(struct clk_hw *hw, return div ? parent_rate / div : 0; } -static unsigned long jh7100_clk_bestdiv(struct jh7100_clk *clk, - unsigned long rate, unsigned long parent) -{ - unsigned long max = clk->max_div; - unsigned long div = DIV_ROUND_UP(parent, rate); - - return min(div, max); -} - static int jh7100_clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { struct jh7100_clk *clk = jh7100_clk_from(hw); unsigned long parent = req->best_parent_rate; unsigned long rate = clamp(req->rate, req->min_rate, req->max_rate); - unsigned long div = jh7100_clk_bestdiv(clk, rate, parent); + unsigned long div = min_t(unsigned long, DIV_ROUND_UP(parent, rate), clk->max_div); unsigned long result = parent / div; /* @@ -442,12 +447,56 @@ static int jh7100_clk_set_rate(struct clk_hw *hw, unsigned long parent_rate) { struct jh7100_clk *clk = jh7100_clk_from(hw); - unsigned long div = jh7100_clk_bestdiv(clk, rate, parent_rate); + unsigned long div = clamp(DIV_ROUND_CLOSEST(parent_rate, rate), + 1UL, (unsigned long)clk->max_div); jh7100_clk_reg_rmw(clk, JH7100_CLK_DIV_MASK, div); return 0; } +static unsigned long jh7100_clk_frac_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct jh7100_clk *clk = jh7100_clk_from(hw); + u32 reg = jh7100_clk_reg_get(clk); + unsigned long div100 = 100 * (reg & JH7100_CLK_INT_MASK) + + ((reg & JH7100_CLK_FRAC_MASK) >> JH7100_CLK_FRAC_SHIFT); + + return (div100 >= JH7100_CLK_FRAC_MIN) ? 100 * parent_rate / div100 : 0; +} + +static int jh7100_clk_frac_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + unsigned long parent100 = 100 * req->best_parent_rate; + unsigned long rate = clamp(req->rate, req->min_rate, req->max_rate); + unsigned long div100 = clamp(DIV_ROUND_CLOSEST(parent100, rate), + JH7100_CLK_FRAC_MIN, JH7100_CLK_FRAC_MAX); + unsigned long result = parent100 / div100; + + /* clamp the result as in jh7100_clk_determine_rate() above */ + if (result > req->max_rate && div100 < JH7100_CLK_FRAC_MAX) + result = parent100 / (div100 + 1); + if (result < req->min_rate && div100 > JH7100_CLK_FRAC_MIN) + result = parent100 / (div100 - 1); + + req->rate = result; + return 0; +} + +static int jh7100_clk_frac_set_rate(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate) +{ + struct jh7100_clk *clk = jh7100_clk_from(hw); + unsigned long div100 = clamp(DIV_ROUND_CLOSEST(100 * parent_rate, rate), + JH7100_CLK_FRAC_MIN, JH7100_CLK_FRAC_MAX); + u32 value = ((div100 % 100) << JH7100_CLK_FRAC_SHIFT) | (div100 / 100); + + jh7100_clk_reg_rmw(clk, JH7100_CLK_DIV_MASK, value); + return 0; +} + static u8 jh7100_clk_get_parent(struct clk_hw *hw) { struct jh7100_clk *clk = jh7100_clk_from(hw); @@ -534,6 +583,13 @@ static const struct clk_ops jh7100_clk_div_ops = { .debug_init = jh7100_clk_debug_init, }; +static const struct clk_ops jh7100_clk_fdiv_ops = { + .recalc_rate = jh7100_clk_frac_recalc_rate, + .determine_rate = jh7100_clk_frac_determine_rate, + .set_rate = jh7100_clk_frac_set_rate, + .debug_init = jh7100_clk_debug_init, +}; + static const struct clk_ops jh7100_clk_gdiv_ops = { .enable = jh7100_clk_enable, .disable = jh7100_clk_disable, @@ -572,6 +628,8 @@ static const struct clk_ops *__init jh7100_clk_ops(u32 max) if (max & JH7100_CLK_DIV_MASK) { if (max & JH7100_CLK_ENABLE) return &jh7100_clk_gdiv_ops; + if (max == JH7100_CLK_FRAC_MAX) + return &jh7100_clk_fdiv_ops; return &jh7100_clk_div_ops; } diff --git a/drivers/clk/tegra/clk-tegra124-emc.c b/drivers/clk/tegra/clk-tegra124-emc.c index 74c1d894cca86..219c80653dbdb 100644 --- a/drivers/clk/tegra/clk-tegra124-emc.c +++ b/drivers/clk/tegra/clk-tegra124-emc.c @@ -198,6 +198,7 @@ static struct tegra_emc *emc_ensure_emc_driver(struct tegra_clk_emc *tegra) tegra->emc = platform_get_drvdata(pdev); if (!tegra->emc) { + put_device(&pdev->dev); pr_err("%s: cannot find EMC driver\n", __func__); return NULL; } diff --git a/drivers/clk/uniphier/clk-uniphier-fixed-rate.c b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c index 5319cd3804801..3bc55ab75314b 100644 --- a/drivers/clk/uniphier/clk-uniphier-fixed-rate.c +++ b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c @@ -24,6 +24,7 @@ struct clk_hw *uniphier_clk_register_fixed_rate(struct device *dev, init.name = name; init.ops = &clk_fixed_rate_ops; + init.flags = 0; init.parent_names = NULL; init.num_parents = 0; diff --git a/drivers/clk/visconti/clkc-tmpv770x.c b/drivers/clk/visconti/clkc-tmpv770x.c index c2b2f41a85a45..6c753b2cb558f 100644 --- a/drivers/clk/visconti/clkc-tmpv770x.c +++ b/drivers/clk/visconti/clkc-tmpv770x.c @@ -176,7 +176,7 @@ static const struct visconti_clk_gate_table clk_gate_tables[] = { { TMPV770X_CLK_WRCK, "wrck", clks_parent_data, ARRAY_SIZE(clks_parent_data), 0, 0x68, 0x168, 9, 32, - -1, }, /* No reset */ + NO_RESET, }, { TMPV770X_CLK_PICKMON, "pickmon", clks_parent_data, ARRAY_SIZE(clks_parent_data), 0, 0x10, 0x110, 8, 4, diff --git a/drivers/clk/visconti/clkc.c b/drivers/clk/visconti/clkc.c index 56a8a4ffebca8..d0b193b5d0b35 100644 --- a/drivers/clk/visconti/clkc.c +++ b/drivers/clk/visconti/clkc.c @@ -147,7 +147,7 @@ int visconti_clk_register_gates(struct visconti_clk_provider *ctx, if (!dev_name) return -ENOMEM; - if (clks[i].rs_id >= 0) { + if (clks[i].rs_id != NO_RESET) { rson_offset = reset[clks[i].rs_id].rson_offset; rsoff_offset = reset[clks[i].rs_id].rsoff_offset; rs_idx = reset[clks[i].rs_id].rs_idx; diff --git a/drivers/clk/visconti/clkc.h b/drivers/clk/visconti/clkc.h index 09ed82ff64e45..8756a1ec42efc 100644 --- a/drivers/clk/visconti/clkc.h +++ b/drivers/clk/visconti/clkc.h @@ -73,4 +73,7 @@ int visconti_clk_register_gates(struct visconti_clk_provider *data, int num_gate, const struct visconti_reset_data *reset, spinlock_t *lock); + +#define NO_RESET 0xFF + #endif /* _VISCONTI_CLKC_H_ */ diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index eb596ff9e7bb3..279ddff81ab49 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -229,8 +229,10 @@ static int __init parse_pmtmr(char *arg) int ret; ret = kstrtouint(arg, 16, &base); - if (ret) - return ret; + if (ret) { + pr_warn("PMTMR: invalid 'pmtmr=' value: '%s'\n", arg); + return 1; + } pr_info("PMTMR IOPort override: 0x%04x -> 0x%04x\n", pmtmr_ioport, base); diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 6db3d5511b0ff..03782b399ea1a 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -541,6 +541,11 @@ static int __init exynos4_timer_interrupts(struct device_node *np, * irqs are specified. */ nr_irqs = of_irq_count(np); + if (nr_irqs > ARRAY_SIZE(mct_irqs)) { + pr_err("exynos-mct: too many (%d) interrupts configured in DT\n", + nr_irqs); + nr_irqs = ARRAY_SIZE(mct_irqs); + } for (i = MCT_L0_IRQ; i < nr_irqs; i++) mct_irqs[i] = irq_of_parse_and_map(np, i); @@ -553,11 +558,14 @@ static int __init exynos4_timer_interrupts(struct device_node *np, mct_irqs[MCT_L0_IRQ], err); } else { for_each_possible_cpu(cpu) { - int mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; + int mct_irq; struct mct_clock_event_device *pcpu_mevt = per_cpu_ptr(&percpu_mct_tick, cpu); pcpu_mevt->evt.irq = -1; + if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs)) + break; + mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; irq_set_status_flags(mct_irq, IRQ_NOAUTOEN); if (request_irq(mct_irq, diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c index cfa4ec7ef3968..790d2c9b42a70 100644 --- a/drivers/clocksource/timer-microchip-pit64b.c +++ b/drivers/clocksource/timer-microchip-pit64b.c @@ -165,7 +165,7 @@ static u64 mchp_pit64b_clksrc_read(struct clocksource *cs) return mchp_pit64b_cnt_read(mchp_pit64b_cs_base); } -static u64 mchp_pit64b_sched_read_clk(void) +static u64 notrace mchp_pit64b_sched_read_clk(void) { return mchp_pit64b_cnt_read(mchp_pit64b_cs_base); } diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index 529cc6a51cdb3..c3f54d9912be7 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np, of_base->base = of_base->name ? of_io_request_and_map(np, of_base->index, of_base->name) : of_iomap(np, of_base->index); - if (IS_ERR(of_base->base)) { - pr_err("Failed to iomap (%s)\n", of_base->name); - return PTR_ERR(of_base->base); + if (IS_ERR_OR_NULL(of_base->base)) { + pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name); + return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM; } return 0; diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c index 1fccb457fcc54..2737407ff0698 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -694,9 +694,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa) return 0; } - if (pa == 0x48034000) /* dra7 dmtimer3 */ + if (pa == 0x4882c000) /* dra7 dmtimer15 */ return dmtimer_percpu_timer_init(np, 0); - else if (pa == 0x48036000) /* dra7 dmtimer4 */ + else if (pa == 0x4882e000) /* dra7 dmtimer16 */ return dmtimer_percpu_timer_init(np, 1); return 0; diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index d1744b5d96190..6dfa86971a757 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -130,7 +130,7 @@ static void get_krait_bin_format_b(struct device *cpu_dev, } /* Check PVS_BLOW_STATUS */ - pte_efuse = *(((u32 *)buf) + 4); + pte_efuse = *(((u32 *)buf) + 1); pte_efuse &= BIT(21); if (pte_efuse) { dev_dbg(cpu_dev, "PVS bin: %d\n", *pvs); diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c index 01e77913a4144..5f27dcc6c110f 100644 --- a/drivers/cpuidle/cpuidle-qcom-spm.c +++ b/drivers/cpuidle/cpuidle-qcom-spm.c @@ -155,6 +155,22 @@ static struct platform_driver spm_cpuidle_driver = { }, }; +static bool __init qcom_spm_find_any_cpu(void) +{ + struct device_node *cpu_node, *saw_node; + + for_each_of_cpu_node(cpu_node) { + saw_node = of_parse_phandle(cpu_node, "qcom,saw", 0); + if (of_device_is_available(saw_node)) { + of_node_put(saw_node); + of_node_put(cpu_node); + return true; + } + of_node_put(saw_node); + } + return false; +} + static int __init qcom_spm_cpuidle_init(void) { struct platform_device *pdev; @@ -164,6 +180,10 @@ static int __init qcom_spm_cpuidle_init(void) if (ret) return ret; + /* Make sure there is actually any CPU managed by the SPM */ + if (!qcom_spm_find_any_cpu()) + return 0; + pdev = platform_device_register_simple("qcom-spm-cpuidle", -1, NULL, 0); if (IS_ERR(pdev)) { diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 54ae8d16e4931..35e3cadccac2b 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -11,6 +11,7 @@ * You could find a link for the datasheet in Documentation/arm/sunxi.rst */ +#include #include #include #include @@ -283,7 +284,9 @@ static int sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq) flow = rctx->flow; err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm)); + local_bh_disable(); crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index 88194718a806c..859b7522faaac 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -9,6 +9,7 @@ * * You could find the datasheet in Documentation/arm/sunxi.rst */ +#include #include #include #include @@ -414,6 +415,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) theend: kfree(buf); kfree(result); + local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 9ef1c85c4aaa5..554e400d41cad 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -11,6 +11,7 @@ * You could find a link for the datasheet in Documentation/arm/sunxi.rst */ +#include #include #include #include @@ -274,7 +275,9 @@ static int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *ar struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); err = sun8i_ss_cipher(breq); + local_bh_disable(); crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 80e89066dbd1a..319fe3279a716 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -30,6 +30,8 @@ static const struct ss_variant ss_a80_variant = { .alg_cipher = { SS_ALG_AES, SS_ALG_DES, SS_ALG_3DES, }, + .alg_hash = { SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, SS_ID_NOTSUPP, + }, .op_mode = { SS_OP_ECB, SS_OP_CBC, }, .ss_clks = { diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 3c073eb3db038..1a71ed49d2333 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -9,6 +9,7 @@ * * You could find the datasheet in Documentation/arm/sunxi.rst */ +#include #include #include #include @@ -442,6 +443,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) theend: kfree(pad); kfree(result); + local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c index c6865cbd334b2..e79514fce731f 100644 --- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c +++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c @@ -265,7 +265,9 @@ static int meson_handle_cipher_request(struct crypto_engine *engine, struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); err = meson_cipher(breq); + local_bh_disable(); crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index d718db224be42..7d4b4ad1db1f3 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -632,6 +632,20 @@ static int ccp_terminate_all(struct dma_chan *dma_chan) return 0; } +static void ccp_dma_release(struct ccp_device *ccp) +{ + struct ccp_dma_chan *chan; + struct dma_chan *dma_chan; + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; + dma_chan = &chan->dma_chan; + tasklet_kill(&chan->cleanup_tasklet); + list_del_rcu(&dma_chan->device_node); + } +} + int ccp_dmaengine_register(struct ccp_device *ccp) { struct ccp_dma_chan *chan; @@ -736,6 +750,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp) return 0; err_reg: + ccp_dma_release(ccp); kmem_cache_destroy(ccp->dma_desc_cache); err_cache: @@ -752,6 +767,7 @@ void ccp_dmaengine_unregister(struct ccp_device *ccp) return; dma_async_device_unregister(dma_dev); + ccp_dma_release(ccp); kmem_cache_destroy(ccp->dma_desc_cache); kmem_cache_destroy(ccp->dma_cmd_cache); diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 8fd774a10edc3..6ab93dfd478a9 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -413,7 +413,7 @@ static int __sev_platform_init_locked(int *error) { struct psp_device *psp = psp_master; struct sev_device *sev; - int rc, psp_ret; + int rc, psp_ret = -1; int (*init_function)(int *error); if (!psp || !psp->sev_data) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index a5e041d9d2cf1..11e0278c8631d 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -258,6 +258,13 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg, { int ret = 0; + if (!nbytes) { + *mapped_nents = 0; + *lbytes = 0; + *nents = 0; + return 0; + } + *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); if (*nents > max_sg_nents) { *nents = 0; diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 78833491f534d..309da6334a0a0 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -257,8 +257,8 @@ static void cc_cipher_exit(struct crypto_tfm *tfm) &ctx_p->user.key_dma_addr); /* Free key buffer in context */ - kfree_sensitive(ctx_p->user.key); dev_dbg(dev, "Free key buffer in context. key=@%p\n", ctx_p->user.key); + kfree_sensitive(ctx_p->user.key); } struct tdes_keys { diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c index c1c2b1d866639..f2be0a7d7f7ac 100644 --- a/drivers/crypto/gemini/sl3516-ce-cipher.c +++ b/drivers/crypto/gemini/sl3516-ce-cipher.c @@ -264,7 +264,9 @@ static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *a struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); err = sl3516_ce_cipher(breq); + local_bh_disable(); crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); return 0; } diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index c5b84a5ea3501..3b29c8993b8c7 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4295,7 +4295,7 @@ static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num) static int qm_vf_read_qos(struct hisi_qm *qm) { int cnt = 0; - int ret; + int ret = -EINVAL; /* reset mailbox qos val */ qm->mb_qos = 0; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 6a45bd23b3635..090920ed50c8f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -2284,9 +2284,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, struct aead_request *aead_req, bool encrypt) { - struct aead_request *subreq = aead_request_ctx(aead_req); struct sec_auth_ctx *a_ctx = &ctx->a_ctx; struct device *dev = ctx->dev; + struct aead_request *subreq; + int ret; /* Kunpeng920 aead mode not support input 0 size */ if (!a_ctx->fallback_aead_tfm) { @@ -2294,6 +2295,10 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, return -EINVAL; } + subreq = aead_request_alloc(a_ctx->fallback_aead_tfm, GFP_KERNEL); + if (!subreq) + return -ENOMEM; + aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm); aead_request_set_callback(subreq, aead_req->base.flags, aead_req->base.complete, aead_req->base.data); @@ -2301,8 +2306,13 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, aead_req->cryptlen, aead_req->iv); aead_request_set_ad(subreq, aead_req->assoclen); - return encrypt ? crypto_aead_encrypt(subreq) : - crypto_aead_decrypt(subreq); + if (encrypt) + ret = crypto_aead_encrypt(subreq); + else + ret = crypto_aead_decrypt(subreq); + aead_request_free(subreq); + + return ret; } static int sec_aead_crypto(struct aead_request *a_req, bool encrypt) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 26d3ab1d308ba..89d4cc767d361 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -443,9 +443,11 @@ static int sec_engine_init(struct hisi_qm *qm) writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG); - /* Enable sm4 extra mode, as ctr/ecb */ - writel_relaxed(SEC_BD_ERR_CHK_EN0, - qm->io_base + SEC_BD_ERR_CHK_EN_REG0); + /* HW V2 enable sm4 extra mode, as ctr/ecb */ + if (qm->ver < QM_HW_V3) + writel_relaxed(SEC_BD_ERR_CHK_EN0, + qm->io_base + SEC_BD_ERR_CHK_EN_REG0); + /* Enable sm4 xts mode multiple iv */ writel_relaxed(SEC_BD_ERR_CHK_EN1, qm->io_base + SEC_BD_ERR_CHK_EN_REG1); diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig index 9125199f1702b..a48591af12d02 100644 --- a/drivers/crypto/marvell/Kconfig +++ b/drivers/crypto/marvell/Kconfig @@ -47,6 +47,7 @@ config CRYPTO_DEV_OCTEONTX2_CPT select CRYPTO_SKCIPHER select CRYPTO_HASH select CRYPTO_AEAD + select NET_DEVLINK help This driver allows you to utilize the Marvell Cryptographic Accelerator Unit(CPT) found in OcteonTX2 series of processors. diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 1b4d425bbf0e4..7fd4503d9cfc8 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1076,6 +1076,39 @@ static void delete_engine_grps(struct pci_dev *pdev, delete_engine_group(&pdev->dev, &eng_grps->grp[i]); } +#define PCI_DEVID_CN10K_RNM 0xA098 +#define RNM_ENTROPY_STATUS 0x8 + +static void rnm_to_cpt_errata_fixup(struct device *dev) +{ + struct pci_dev *pdev; + void __iomem *base; + int timeout = 5000; + + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RNM, NULL); + if (!pdev) + return; + + base = pci_ioremap_bar(pdev, 0); + if (!base) + goto put_pdev; + + while ((readq(base + RNM_ENTROPY_STATUS) & 0x7F) != 0x40) { + cpu_relax(); + udelay(1); + timeout--; + if (!timeout) { + dev_warn(dev, "RNM is not producing entropy\n"); + break; + } + } + + iounmap(base); + +put_pdev: + pci_dev_put(pdev); +} + int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type) { @@ -1189,9 +1222,17 @@ int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf, if (is_dev_otx2(pdev)) goto unlock; + + /* + * Ensure RNM_ENTROPY_STATUS[NORMAL_CNT] = 0x40 before writing + * CPT_AF_CTL[RNM_REQ_EN] = 1 as a workaround for HW errata. + */ + rnm_to_cpt_errata_fixup(&pdev->dev); + /* * Configure engine group mask to allow context prefetching - * for the groups. + * for the groups and enable random number request, to enable + * CPT to request random numbers from RNM. */ otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL, OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16), diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c index 2748a3327e391..f8f8542ce3e47 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -1634,16 +1634,13 @@ static inline int cpt_register_algs(void) { int i, err = 0; - if (!IS_ENABLED(CONFIG_DM_CRYPT)) { - for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++) - otx2_cpt_skciphers[i].base.cra_flags &= - ~CRYPTO_ALG_DEAD; - - err = crypto_register_skciphers(otx2_cpt_skciphers, - ARRAY_SIZE(otx2_cpt_skciphers)); - if (err) - return err; - } + for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++) + otx2_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_skciphers(otx2_cpt_skciphers, + ARRAY_SIZE(otx2_cpt_skciphers)); + if (err) + return err; for (i = 0; i < ARRAY_SIZE(otx2_cpt_aeads); i++) otx2_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index d19e5ffb5104b..d6f9e2fe863d7 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -331,7 +331,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128); } - for_each_sg(req->src, src, sg_nents(src), i) { + for_each_sg(req->src, src, sg_nents(req->src), i) { src_buf = sg_virt(src); len = sg_dma_len(src); tlen += len; diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 6d10edc40aca0..68d39c833332e 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -52,7 +52,7 @@ static const char *const dev_cfg_services[] = { static int get_service_enabled(struct adf_accel_dev *accel_dev) { char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; - u32 ret; + int ret; ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, ADF_SERVICES_ENABLED, services); diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index a6c78b9c730bc..fa4c350c1bf92 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -75,6 +75,13 @@ static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; + /* Temporarily set the number of crypto instances to zero to avoid + * registering the crypto algorithms. + * This will be removed when the algorithms will support the + * CRYPTO_TFM_REQ_MAY_BACKLOG flag + */ + instances = 0; + for (i = 0; i < instances; i++) { val = i; bank = i * 2; diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c index 8efbedf63bc80..3b3ea849c5e53 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c @@ -9,15 +9,12 @@ #include "adf_pfvf_pf_proto.h" #include "adf_pfvf_utils.h" -#define ADF_4XXX_MAX_NUM_VFS 16 - #define ADF_4XXX_PF2VM_OFFSET(i) (0x40B010 + ((i) * 0x20)) #define ADF_4XXX_VM2PF_OFFSET(i) (0x40B014 + ((i) * 0x20)) /* VF2PF interrupt source registers */ -#define ADF_4XXX_VM2PF_SOU(i) (0x41A180 + ((i) * 4)) -#define ADF_4XXX_VM2PF_MSK(i) (0x41A1C0 + ((i) * 4)) -#define ADF_4XXX_VM2PF_INT_EN_MSK BIT(0) +#define ADF_4XXX_VM2PF_SOU 0x41A180 +#define ADF_4XXX_VM2PF_MSK 0x41A1C0 #define ADF_PFVF_GEN4_MSGTYPE_SHIFT 2 #define ADF_PFVF_GEN4_MSGTYPE_MASK 0x3F @@ -41,51 +38,30 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr) { - int i; u32 sou, mask; - int num_csrs = ADF_4XXX_MAX_NUM_VFS; - u32 vf_mask = 0; - for (i = 0; i < num_csrs; i++) { - sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU(i)); - mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK(i)); - sou &= ~mask; - vf_mask |= sou << i; - } + sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU); + mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK); - return vf_mask; + return sou &= ~mask; } static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { - int num_csrs = ADF_4XXX_MAX_NUM_VFS; - unsigned long mask = vf_mask; unsigned int val; - int i; - - for_each_set_bit(i, &mask, num_csrs) { - unsigned int offset = ADF_4XXX_VM2PF_MSK(i); - val = ADF_CSR_RD(pmisc_addr, offset) & ~ADF_4XXX_VM2PF_INT_EN_MSK; - ADF_CSR_WR(pmisc_addr, offset, val); - } + val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) & ~vf_mask; + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); } static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { - int num_csrs = ADF_4XXX_MAX_NUM_VFS; - unsigned long mask = vf_mask; unsigned int val; - int i; - - for_each_set_bit(i, &mask, num_csrs) { - unsigned int offset = ADF_4XXX_VM2PF_MSK(i); - val = ADF_CSR_RD(pmisc_addr, offset) | ADF_4XXX_VM2PF_INT_EN_MSK; - ADF_CSR_WR(pmisc_addr, offset, val); - } + val = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK) | vf_mask; + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); } static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev, diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c index 14b222691c9c2..1141258db4b65 100644 --- a/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pfvf_vf_msg.c @@ -96,7 +96,7 @@ int adf_vf2pf_request_version(struct adf_accel_dev *accel_dev) int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; - struct capabilities_v3 cap_msg = { { 0 }, }; + struct capabilities_v3 cap_msg = { 0 }; unsigned int len = sizeof(cap_msg); if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_CAPABILITIES) @@ -141,7 +141,7 @@ int adf_vf2pf_get_capabilities(struct adf_accel_dev *accel_dev) int adf_vf2pf_get_ring_to_svc(struct adf_accel_dev *accel_dev) { - struct ring_to_svc_map_v1 rts_map_msg = { { 0 }, }; + struct ring_to_svc_map_v1 rts_map_msg = { 0 }; unsigned int len = sizeof(rts_map_msg); if (accel_dev->vf.pf_compat_ver < ADF_PFVF_COMPAT_RING_TO_SVC_MAP) diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 7234c4940fae4..67c9588e89df9 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -161,6 +161,13 @@ int qat_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; + /* Temporarily set the number of crypto instances to zero to avoid + * registering the crypto algorithms. + * This will be removed when the algorithms will support the + * CRYPTO_TFM_REQ_MAY_BACKLOG flag + */ + instances = 0; + for (i = 0; i < instances; i++) { val = i; snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 1cece1a7d3f00..5bbf0d2722e11 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -506,7 +506,6 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { .exit = rk_ablk_exit_tfm, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES_BLOCK_SIZE, .setkey = rk_tdes_setkey, .encrypt = rk_des3_ede_ecb_encrypt, .decrypt = rk_des3_ede_ecb_decrypt, diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig index c85fab7ef0bdd..b2c28b87f14b3 100644 --- a/drivers/crypto/vmx/Kconfig +++ b/drivers/crypto/vmx/Kconfig @@ -2,7 +2,11 @@ config CRYPTO_DEV_VMX_ENCRYPT tristate "Encryption acceleration support on P8 CPU" depends on CRYPTO_DEV_VMX + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_CTR select CRYPTO_GHASH + select CRYPTO_XTS default m help Support for VMX cryptographic acceleration instructions on Power8 CPU. diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 40ab50318dafa..a90202ac88d2f 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o ccflags-y += -I$(srctree)/drivers/cxl -cxl_core-y := bus.o +cxl_core-y := port.o cxl_core-y += pmem.o cxl_core-y += regs.o cxl_core-y += memdev.o diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/port.c similarity index 99% rename from drivers/cxl/core/bus.c rename to drivers/cxl/core/port.c index 3f9b98ecd18b7..aa5239ac67c67 100644 --- a/drivers/cxl/core/bus.c +++ b/drivers/cxl/core/port.c @@ -182,6 +182,7 @@ static void cxl_decoder_release(struct device *dev) ida_free(&port->decoder_ida, cxld->id); kfree(cxld); + put_device(&port->dev); } static const struct device_type cxl_decoder_switch_type = { @@ -500,7 +501,10 @@ struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets) if (rc < 0) goto err; + /* need parent to stick around to release the id */ + get_device(&port->dev); cxld->id = rc; + cxld->nr_targets = nr_targets; dev = &cxld->dev; device_initialize(dev); diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index e37e23bf43553..6a18ff8739e00 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -35,7 +35,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map) { int cap, cap_count; - u64 cap_array; + u32 cap_array; *map = (struct cxl_component_reg_map) { 0 }; @@ -45,11 +45,11 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, */ base += CXL_CM_OFFSET; - cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET); + cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET); if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) { dev_err(dev, - "Couldn't locate the CXL.cache and CXL.mem capability array header./n"); + "Couldn't locate the CXL.cache and CXL.mem capability array header.\n"); return; } diff --git a/drivers/dax/super.c b/drivers/dax/super.c index e3029389d8097..6bd565fe2e63b 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -476,6 +476,7 @@ static int dax_fs_init(void) static void dax_fs_exit(void) { kern_unmount(dax_mnt); + rcu_barrier(); kmem_cache_destroy(dax_cache); } diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index c57a609db75be..e7330684d3b82 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -190,6 +190,10 @@ static long udmabuf_create(struct miscdevice *device, if (ubuf->pagecount > pglimit) goto err; } + + if (!ubuf->pagecount) + goto err; + ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->pages), GFP_KERNEL); if (!ubuf->pages) { diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c index 97c87a7cba879..43817ced3a3e1 100644 --- a/drivers/dma/hisi_dma.c +++ b/drivers/dma/hisi_dma.c @@ -30,7 +30,7 @@ #define HISI_DMA_MODE 0x217c #define HISI_DMA_OFFSET 0x100 -#define HISI_DMA_MSI_NUM 30 +#define HISI_DMA_MSI_NUM 32 #define HISI_DMA_CHAN_NUM 30 #define HISI_DMA_Q_DEPTH_VAL 1024 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 573ad8b86804e..3061fe857d69f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -681,8 +681,13 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) group->use_rdbuf_limit = false; group->rdbufs_allowed = 0; group->rdbufs_reserved = 0; - group->tc_a = -1; - group->tc_b = -1; + if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) { + group->tc_a = 1; + group->tc_b = 1; + } else { + group->tc_a = -1; + group->tc_b = -1; + } } } diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 0ef086e43090b..7e771c56c13c6 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -266,7 +266,7 @@ static int efi_pstore_write(struct pstore_record *record) efi_name[i] = name[i]; ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES, - preemptible(), record->size, record->psi->buf); + false, record->size, record->psi->buf); if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE)) if (!schedule_work(&efivar_work)) diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig index 931544c9f63d4..983e07dc022ed 100644 --- a/drivers/firmware/google/Kconfig +++ b/drivers/firmware/google/Kconfig @@ -21,7 +21,7 @@ config GOOGLE_SMI config GOOGLE_COREBOOT_TABLE tristate "Coreboot Table Access" - depends on ACPI || OF + depends on HAS_IOMEM && (ACPI || OF) help This option enables the coreboot_table module, which provides other firmware modules access to the coreboot table. The coreboot table diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index 7db8066b19fd5..3f67bf774821d 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -749,12 +749,6 @@ int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) }; int ret; - desc.args[0] = addr; - desc.args[1] = size; - desc.args[2] = spare; - desc.arginfo = QCOM_SCM_ARGS(3, QCOM_SCM_RW, QCOM_SCM_VAL, - QCOM_SCM_VAL); - ret = qcom_scm_call(__scm->dev, &desc, NULL); /* the pg table has been initialized already, ignore the error */ diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 29c0a616b3177..c4bf934e3553e 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -477,7 +477,7 @@ static int svc_normal_to_secure_thread(void *data) case INTEL_SIP_SMC_RSU_ERROR: pr_err("%s: STATUS_ERROR\n", __func__); cbdata->status = BIT(SVC_STATUS_ERROR); - cbdata->kaddr1 = NULL; + cbdata->kaddr1 = &res.a1; cbdata->kaddr2 = NULL; cbdata->kaddr3 = NULL; pdata->chan->scl->receive_cb(pdata->chan->scl, cbdata); diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index 303a491e520d1..757cc8b9f3de9 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -113,16 +113,21 @@ __init int sysfb_create_simplefb(const struct screen_info *si, sysfb_apply_efi_quirks(pd); ret = platform_device_add_resources(pd, &res, 1); - if (ret) { - platform_device_put(pd); - return ret; - } + if (ret) + goto err_put_device; ret = platform_device_add_data(pd, mode, sizeof(*mode)); - if (ret) { - platform_device_put(pd); - return ret; - } + if (ret) + goto err_put_device; + + ret = platform_device_add(pd); + if (ret) + goto err_put_device; + + return 0; + +err_put_device: + platform_device_put(pd); - return platform_device_add(pd); + return ret; } diff --git a/drivers/fsi/fsi-master-aspeed.c b/drivers/fsi/fsi-master-aspeed.c index 8606e55c1721c..0bed2fab80558 100644 --- a/drivers/fsi/fsi-master-aspeed.c +++ b/drivers/fsi/fsi-master-aspeed.c @@ -542,25 +542,28 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev) return rc; } - aspeed = devm_kzalloc(&pdev->dev, sizeof(*aspeed), GFP_KERNEL); + aspeed = kzalloc(sizeof(*aspeed), GFP_KERNEL); if (!aspeed) return -ENOMEM; aspeed->dev = &pdev->dev; aspeed->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(aspeed->base)) - return PTR_ERR(aspeed->base); + if (IS_ERR(aspeed->base)) { + rc = PTR_ERR(aspeed->base); + goto err_free_aspeed; + } aspeed->clk = devm_clk_get(aspeed->dev, NULL); if (IS_ERR(aspeed->clk)) { dev_err(aspeed->dev, "couldn't get clock\n"); - return PTR_ERR(aspeed->clk); + rc = PTR_ERR(aspeed->clk); + goto err_free_aspeed; } rc = clk_prepare_enable(aspeed->clk); if (rc) { dev_err(aspeed->dev, "couldn't enable clock\n"); - return rc; + goto err_free_aspeed; } rc = setup_cfam_reset(aspeed); @@ -595,7 +598,7 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev) rc = opb_readl(aspeed, ctrl_base + FSI_MVER, &raw); if (rc) { dev_err(&pdev->dev, "failed to read hub version\n"); - return rc; + goto err_release; } reg = be32_to_cpu(raw); @@ -634,6 +637,8 @@ static int fsi_master_aspeed_probe(struct platform_device *pdev) err_release: clk_disable_unprepare(aspeed->clk); +err_free_aspeed: + kfree(aspeed); return rc; } diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c index da1486bb6a144..bcb756dc98663 100644 --- a/drivers/fsi/fsi-scom.c +++ b/drivers/fsi/fsi-scom.c @@ -145,7 +145,7 @@ static int put_indirect_scom_form0(struct scom_device *scom, uint64_t value, uint64_t addr, uint32_t *status) { uint64_t ind_data, ind_addr; - int rc, retries, err = 0; + int rc, err; if (value & ~XSCOM_DATA_IND_DATA) return -EINVAL; @@ -156,19 +156,14 @@ static int put_indirect_scom_form0(struct scom_device *scom, uint64_t value, if (rc || (*status & SCOM_STATUS_ANY_ERR)) return rc; - for (retries = 0; retries < SCOM_MAX_IND_RETRIES; retries++) { - rc = __get_scom(scom, &ind_data, addr, status); - if (rc || (*status & SCOM_STATUS_ANY_ERR)) - return rc; + rc = __get_scom(scom, &ind_data, addr, status); + if (rc || (*status & SCOM_STATUS_ANY_ERR)) + return rc; - err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT; - *status = err << SCOM_STATUS_PIB_RESP_SHIFT; - if ((ind_data & XSCOM_DATA_IND_COMPLETE) || (err != SCOM_PIB_BLOCKED)) - return 0; + err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT; + *status = err << SCOM_STATUS_PIB_RESP_SHIFT; - msleep(1); - } - return rc; + return 0; } static int put_indirect_scom_form1(struct scom_device *scom, uint64_t value, @@ -188,7 +183,7 @@ static int get_indirect_scom_form0(struct scom_device *scom, uint64_t *value, uint64_t addr, uint32_t *status) { uint64_t ind_data, ind_addr; - int rc, retries, err = 0; + int rc, err; ind_addr = addr & XSCOM_ADDR_DIRECT_PART; ind_data = (addr & XSCOM_ADDR_INDIRECT_PART) | XSCOM_DATA_IND_READ; @@ -196,21 +191,15 @@ static int get_indirect_scom_form0(struct scom_device *scom, uint64_t *value, if (rc || (*status & SCOM_STATUS_ANY_ERR)) return rc; - for (retries = 0; retries < SCOM_MAX_IND_RETRIES; retries++) { - rc = __get_scom(scom, &ind_data, addr, status); - if (rc || (*status & SCOM_STATUS_ANY_ERR)) - return rc; - - err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT; - *status = err << SCOM_STATUS_PIB_RESP_SHIFT; - *value = ind_data & XSCOM_DATA_IND_DATA; + rc = __get_scom(scom, &ind_data, addr, status); + if (rc || (*status & SCOM_STATUS_ANY_ERR)) + return rc; - if ((ind_data & XSCOM_DATA_IND_COMPLETE) || (err != SCOM_PIB_BLOCKED)) - return 0; + err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT; + *status = err << SCOM_STATUS_PIB_RESP_SHIFT; + *value = ind_data & XSCOM_DATA_IND_DATA; - msleep(1); - } - return rc; + return 0; } static int raw_put_scom(struct scom_device *scom, uint64_t value, @@ -289,7 +278,7 @@ static int put_scom(struct scom_device *scom, uint64_t value, int rc; rc = raw_put_scom(scom, value, addr, &status); - if (rc == -ENODEV) + if (rc) return rc; rc = handle_fsi2pib_status(scom, status); @@ -308,7 +297,7 @@ static int get_scom(struct scom_device *scom, uint64_t *value, int rc; rc = raw_get_scom(scom, value, addr, &status); - if (rc == -ENODEV) + if (rc) return rc; rc = handle_fsi2pib_status(scom, status); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index c16a2704ced65..f3160b951df3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -175,7 +175,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */ if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) { - if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) && + if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) && (mode_clock * 5/4 <= max_tmds_clock)) bpc = 10; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ed077de426d9b..f18c698137a6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -2073,6 +2074,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { + struct drm_device *dev = adev_to_drm(adev); + struct pci_dev *parent; int i, r; amdgpu_device_enable_virtual_display(adev); @@ -2137,6 +2140,18 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; } + if (amdgpu_has_atpx() && + (amdgpu_is_atpx_hybrid() || + amdgpu_has_atpx_dgpu_power_cntl()) && + ((adev->flags & AMD_IS_APU) == 0) && + !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) + adev->flags |= AMD_IS_PX; + + if (!(adev->flags & AMD_IS_APU)) { + parent = pci_upstream_bridge(adev->pdev); + adev->has_pr3 = parent ? pci_pr3_present(parent) : false; + } + amdgpu_amdkfd_device_probe(adev); adev->pm.pp_feature = amdgpu_pp_feature_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 2a786e7886277..978c46395ced0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -91,17 +91,13 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, int amdgpu_fru_get_product_info(struct amdgpu_device *adev) { - unsigned char buff[AMDGPU_PRODUCT_NAME_LEN+2]; + unsigned char buff[AMDGPU_PRODUCT_NAME_LEN]; u32 addrptr; int size, len; - int offset = 2; if (!is_fru_eeprom_supported(adev)) return 0; - if (adev->asic_type == CHIP_ALDEBARAN) - offset = 0; - /* If algo exists, it means that the i2c_adapter's initialized */ if (!adev->pm.smu_i2c.algo) { DRM_WARN("Cannot access FRU, EEPROM accessor not initialized"); @@ -143,8 +139,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) AMDGPU_PRODUCT_NAME_LEN); len = AMDGPU_PRODUCT_NAME_LEN - 1; } - /* Start at 2 due to buff using fields 0 and 1 for the address */ - memcpy(adev->product_name, &buff[offset], len); + memcpy(adev->product_name, buff, len); adev->product_name[len] = '\0'; addrptr += size + 1; @@ -162,7 +157,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake"); len = sizeof(adev->product_number) - 1; } - memcpy(adev->product_number, &buff[offset], len); + memcpy(adev->product_number, buff, len); adev->product_number[len] = '\0'; addrptr += size + 1; @@ -189,7 +184,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake"); len = sizeof(adev->serial) - 1; } - memcpy(adev->serial, &buff[offset], len); + memcpy(adev->serial, buff, len); adev->serial[len] = '\0'; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1ebb91db22743..11a385264bbd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -152,21 +152,10 @@ static void amdgpu_get_audio_func(struct amdgpu_device *adev) int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) { struct drm_device *dev; - struct pci_dev *parent; int r, acpi_status; dev = adev_to_drm(adev); - if (amdgpu_has_atpx() && - (amdgpu_is_atpx_hybrid() || - amdgpu_has_atpx_dgpu_power_cntl()) && - ((flags & AMD_IS_APU) == 0) && - !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) - flags |= AMD_IS_PX; - - parent = pci_upstream_bridge(adev->pdev); - adev->has_pr3 = parent ? pci_pr3_present(parent) : false; - /* amdgpu_device_init should report only fatal error * like memory allocation failure or iomapping failure, * or memory manager initialization failure, it must diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 075429bea4275..b28b5c4908601 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2613,10 +2613,13 @@ static int dm_resume(void *handle) * before the 0 streams commit. * * DC expects that link encoder assignments are *not* valid - * when committing a state, so as a workaround it needs to be - * cleared here. + * when committing a state, so as a workaround we can copy + * off of the current state. + * + * We lose the previous assignments, but we had already + * commit 0 streams anyway. */ - link_enc_cfg_init(dm->dc, dc_state); + link_enc_cfg_copy(adev->dm.dc->current_state, dc_state); if (dc_enable_dmub_notifications(adev->dm.dc)) amdgpu_dm_outbox_init(adev); @@ -8144,6 +8147,9 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, mode = amdgpu_dm_create_common_mode(encoder, common_modes[i].name, common_modes[i].w, common_modes[i].h); + if (!mode) + continue; + drm_mode_probed_add(connector, mode); amdgpu_dm_connector->num_modes++; } @@ -10858,10 +10864,13 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm_crtc *crtc) { struct drm_connector *connector; - struct drm_connector_state *conn_state; + struct drm_connector_state *conn_state, *old_conn_state; struct amdgpu_dm_connector *aconnector = NULL; int i; - for_each_new_connector_in_state(state, connector, conn_state, i) { + for_each_oldnew_connector_in_state(state, connector, old_conn_state, conn_state, i) { + if (!conn_state->crtc) + conn_state = old_conn_state; + if (conn_state->crtc != crtc) continue; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index a55944da8d53f..72a3fded7142a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -122,6 +122,7 @@ static void remove_link_enc_assignment( stream->link_enc = NULL; state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id = ENGINE_ID_UNKNOWN; state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream = NULL; + dc_stream_release(stream); break; } } @@ -271,6 +272,13 @@ void link_enc_cfg_init( state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY; } +void link_enc_cfg_copy(const struct dc_state *src_ctx, struct dc_state *dst_ctx) +{ + memcpy(&dst_ctx->res_ctx.link_enc_cfg_ctx, + &src_ctx->res_ctx.link_enc_cfg_ctx, + sizeof(dst_ctx->res_ctx.link_enc_cfg_ctx)); +} + void link_enc_cfg_link_encs_assign( struct dc *dc, struct dc_state *state, diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h index a4e43b4826e0e..59ceb9ed385db 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h @@ -39,6 +39,11 @@ void link_enc_cfg_init( const struct dc *dc, struct dc_state *state); +/* + * Copies a link encoder assignment from another state. + */ +void link_enc_cfg_copy(const struct dc_state *src_ctx, struct dc_state *dst_ctx); + /* * Algorithm for assigning available DIG link encoders to streams. * diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c index 0f15bcada4e99..717977aec6d06 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c @@ -265,14 +265,6 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &pflip_irq_info_funcs\ } -#define vupdate_int_entry(reg_num)\ - [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\ - IRQ_REG_ENTRY(OTG, reg_num,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_INT_EN,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_EVENT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ - } - /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic * of DCE's DC_IRQ_SOURCE_VUPDATEx. */ @@ -401,12 +393,6 @@ irq_source_info_dcn21[DAL_IRQ_SOURCES_NUMBER] = { dc_underflow_int_entry(6), [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), - vupdate_int_entry(0), - vupdate_int_entry(1), - vupdate_int_entry(2), - vupdate_int_entry(3), - vupdate_int_entry(4), - vupdate_int_entry(5), vupdate_no_lock_int_entry(0), vupdate_no_lock_int_entry(1), vupdate_no_lock_int_entry(2), diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 48cc009d9bdf3..dc910003f3cab 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2134,8 +2134,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } } - /* setting should not be allowed from VF */ - if (amdgpu_sriov_vf(adev)) { + /* setting should not be allowed from VF if not in one VF mode */ + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) { dev_attr->attr.mode &= ~S_IWUGO; dev_attr->store = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index d93d28c1af95b..b51368fa30253 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -138,7 +138,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu, uint32_t *min, uint32_t *max) { - int ret = 0; + int ret = -ENOTSUPP; if (!min && !max) return -EINVAL; diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index 592ecfcf00caf..6a882891d91c5 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -169,6 +169,7 @@ #define ADV7511_PACKET_ENABLE_SPARE2 BIT(1) #define ADV7511_PACKET_ENABLE_SPARE1 BIT(0) +#define ADV7535_REG_POWER2_HPD_OVERRIDE BIT(6) #define ADV7511_REG_POWER2_HPD_SRC_MASK 0xc0 #define ADV7511_REG_POWER2_HPD_SRC_BOTH 0x00 #define ADV7511_REG_POWER2_HPD_SRC_HPD 0x40 diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index f8e5da1485999..77118c3395bf0 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -351,11 +351,17 @@ static void __adv7511_power_on(struct adv7511 *adv7511) * from standby or are enabled. When the HPD goes low the adv7511 is * reset and the outputs are disabled which might cause the monitor to * go to standby again. To avoid this we ignore the HPD pin for the - * first few seconds after enabling the output. + * first few seconds after enabling the output. On the other hand + * adv7535 require to enable HPD Override bit for proper HPD. */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, - ADV7511_REG_POWER2_HPD_SRC_MASK, - ADV7511_REG_POWER2_HPD_SRC_NONE); + if (adv7511->type == ADV7535) + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7535_REG_POWER2_HPD_OVERRIDE, + ADV7535_REG_POWER2_HPD_OVERRIDE); + else + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7511_REG_POWER2_HPD_SRC_MASK, + ADV7511_REG_POWER2_HPD_SRC_NONE); } static void adv7511_power_on(struct adv7511 *adv7511) @@ -375,6 +381,10 @@ static void adv7511_power_on(struct adv7511 *adv7511) static void __adv7511_power_off(struct adv7511 *adv7511) { /* TODO: setup additional power down modes */ + if (adv7511->type == ADV7535) + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7535_REG_POWER2_HPD_OVERRIDE, 0); + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER, ADV7511_POWER_POWER_DOWN, ADV7511_POWER_POWER_DOWN); @@ -672,9 +682,14 @@ adv7511_detect(struct adv7511 *adv7511, struct drm_connector *connector) status = connector_status_disconnected; } else { /* Renable HPD sensing */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, - ADV7511_REG_POWER2_HPD_SRC_MASK, - ADV7511_REG_POWER2_HPD_SRC_BOTH); + if (adv7511->type == ADV7535) + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7535_REG_POWER2_HPD_OVERRIDE, + ADV7535_REG_POWER2_HPD_OVERRIDE); + else + regmap_update_bits(adv7511->regmap, ADV7511_REG_POWER2, + ADV7511_REG_POWER2_HPD_SRC_MASK, + ADV7511_REG_POWER2_HPD_SRC_BOTH); } adv7511->status = status; diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 2346dbcc505f2..e596cacce9e3e 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -846,7 +846,8 @@ static int segments_edid_read(struct anx7625_data *ctx, static int sp_tx_edid_read(struct anx7625_data *ctx, u8 *pedid_blocks_buf) { - u8 offset, edid_pos; + u8 offset; + int edid_pos; int count, blocks_num; u8 pblock_buf[MAX_DPCD_BUFFER_SIZE]; u8 i, j; diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c index d8a15c459b42c..829e1a1446567 100644 --- a/drivers/gpu/drm/bridge/cdns-dsi.c +++ b/drivers/gpu/drm/bridge/cdns-dsi.c @@ -1284,6 +1284,7 @@ static const struct of_device_id cdns_dsi_of_match[] = { { .compatible = "cdns,dsi" }, { }, }; +MODULE_DEVICE_TABLE(of, cdns_dsi_of_match); static struct platform_driver cdns_dsi_platform_driver = { .probe = cdns_dsi_drm_probe, diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index dafb1b47c15fb..00597eb54661f 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -1164,7 +1164,11 @@ static int lt9611_probe(struct i2c_client *client, lt9611_enable_hpd_interrupts(lt9611); - return lt9611_audio_init(dev, lt9611); + ret = lt9611_audio_init(dev, lt9611); + if (ret) + goto err_remove_bridge; + + return 0; err_remove_bridge: drm_bridge_remove(<9611->bridge); diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c index af07eeb47ca02..6e484d836cfe2 100644 --- a/drivers/gpu/drm/bridge/nwl-dsi.c +++ b/drivers/gpu/drm/bridge/nwl-dsi.c @@ -1204,6 +1204,7 @@ static int nwl_dsi_probe(struct platform_device *pdev) ret = nwl_dsi_select_input(dsi); if (ret < 0) { + pm_runtime_disable(dev); mipi_dsi_host_unregister(&dsi->dsi_host); return ret; } diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 843265d7f1b12..ec7745c31da07 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -2120,7 +2120,7 @@ static void sii8620_init_rcp_input_dev(struct sii8620 *ctx) if (ret) { dev_err(ctx->dev, "Failed to register RC device\n"); ctx->error = ret; - rc_free_device(ctx->rc_dev); + rc_free_device(rc_dev); return; } ctx->rc_dev = rc_dev; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 54d8fdad395f5..97cdc61b57f61 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2551,8 +2551,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, if (!output_fmts) return NULL; - /* If dw-hdmi is the only bridge, avoid negociating with ourselves */ - if (list_is_singular(&bridge->encoder->bridge_chain)) { + /* If dw-hdmi is the first or only bridge, avoid negociating with ourselves */ + if (list_is_singular(&bridge->encoder->bridge_chain) || + list_is_first(&bridge->chain_node, &bridge->encoder->bridge_chain)) { *num_output_fmts = 1; output_fmts[0] = MEDIA_BUS_FMT_FIXED; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index e44e18a0112af..56c3fd08c6a0b 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -1199,6 +1199,7 @@ __dw_mipi_dsi_probe(struct platform_device *pdev, ret = mipi_dsi_host_register(&dsi->dsi_host); if (ret) { dev_err(dev, "Failed to register MIPI host: %d\n", ret); + pm_runtime_disable(dev); dw_mipi_dsi_debugfs_remove(dsi); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 945f08de45f1d..314a84ffcea3d 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -560,10 +560,14 @@ static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model) ctx->host_node = of_graph_get_remote_port_parent(endpoint); of_node_put(endpoint); - if (ctx->dsi_lanes < 0 || ctx->dsi_lanes > 4) - return -EINVAL; - if (!ctx->host_node) - return -ENODEV; + if (ctx->dsi_lanes < 0 || ctx->dsi_lanes > 4) { + ret = -EINVAL; + goto err_put_node; + } + if (!ctx->host_node) { + ret = -ENODEV; + goto err_put_node; + } ctx->lvds_dual_link = false; ctx->lvds_dual_link_even_odd_swap = false; @@ -590,16 +594,22 @@ static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model) ret = drm_of_find_panel_or_bridge(dev->of_node, 2, 0, &panel, &panel_bridge); if (ret < 0) - return ret; + goto err_put_node; if (panel) { panel_bridge = devm_drm_panel_bridge_add(dev, panel); - if (IS_ERR(panel_bridge)) - return PTR_ERR(panel_bridge); + if (IS_ERR(panel_bridge)) { + ret = PTR_ERR(panel_bridge); + goto err_put_node; + } } ctx->panel_bridge = panel_bridge; return 0; + +err_put_node: + of_node_put(ctx->host_node); + return ret; } static int sn65dsi83_host_attach(struct sn65dsi83 *ctx) @@ -673,8 +683,10 @@ static int sn65dsi83_probe(struct i2c_client *client, return ret; ctx->regmap = devm_regmap_init_i2c(client, &sn65dsi83_regmap_config); - if (IS_ERR(ctx->regmap)) - return PTR_ERR(ctx->regmap); + if (IS_ERR(ctx->regmap)) { + ret = PTR_ERR(ctx->regmap); + goto err_put_node; + } dev_set_drvdata(dev, ctx); i2c_set_clientdata(client, ctx); @@ -691,6 +703,8 @@ static int sn65dsi83_probe(struct i2c_client *client, err_remove_bridge: drm_bridge_remove(&ctx->bridge); +err_put_node: + of_node_put(ctx->host_node); return ret; } diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 23f9073bc473a..c9528aa62c9c9 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -144,16 +144,6 @@ u8 drm_dp_get_adjust_tx_ffe_preset(const u8 link_status[DP_LINK_STATUS_SIZE], } EXPORT_SYMBOL(drm_dp_get_adjust_tx_ffe_preset); -u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZE], - unsigned int lane) -{ - unsigned int offset = DP_ADJUST_REQUEST_POST_CURSOR2; - u8 value = dp_link_status(link_status, offset); - - return (value >> (lane << 1)) & 0x3; -} -EXPORT_SYMBOL(drm_dp_get_adjust_request_post_cursor); - static int __8b10b_clock_recovery_delay_us(const struct drm_dp_aux *aux, u8 rd_interval) { if (rd_interval > 4) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index f5f5de362ff2c..b8f5419e514ae 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -4848,7 +4848,8 @@ bool drm_detect_monitor_audio(struct edid *edid) if (!edid_ext) goto end; - has_audio = ((edid_ext[3] & EDID_BASIC_AUDIO) != 0); + has_audio = (edid_ext[0] == CEA_EXT && + (edid_ext[3] & EDID_BASIC_AUDIO) != 0); if (has_audio) { DRM_DEBUG_KMS("Monitor has basic audio support\n"); @@ -5075,21 +5076,21 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, if (hdmi[6] & DRM_EDID_HDMI_DC_30) { dc_bpc = 10; - info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_30; + info->edid_hdmi_rgb444_dc_modes |= DRM_EDID_HDMI_DC_30; DRM_DEBUG("%s: HDMI sink does deep color 30.\n", connector->name); } if (hdmi[6] & DRM_EDID_HDMI_DC_36) { dc_bpc = 12; - info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_36; + info->edid_hdmi_rgb444_dc_modes |= DRM_EDID_HDMI_DC_36; DRM_DEBUG("%s: HDMI sink does deep color 36.\n", connector->name); } if (hdmi[6] & DRM_EDID_HDMI_DC_48) { dc_bpc = 16; - info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_48; + info->edid_hdmi_rgb444_dc_modes |= DRM_EDID_HDMI_DC_48; DRM_DEBUG("%s: HDMI sink does deep color 48.\n", connector->name); } @@ -5104,16 +5105,9 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, connector->name, dc_bpc); info->bpc = dc_bpc; - /* - * Deep color support mandates RGB444 support for all video - * modes and forbids YCRCB422 support for all video modes per - * HDMI 1.3 spec. - */ - info->color_formats = DRM_COLOR_FORMAT_RGB444; - /* YCRCB444 is optional according to spec. */ if (hdmi[6] & DRM_EDID_HDMI_DC_Y444) { - info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; + info->edid_hdmi_ycbcr444_dc_modes = info->edid_hdmi_rgb444_dc_modes; DRM_DEBUG("%s: HDMI sink does YCRCB444 in deep color.\n", connector->name); } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index ed43b987d306a..f15127a32f7a7 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2346,6 +2346,7 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->fbops = &drm_fbdev_fb_ops; fbi->screen_size = sizes->surface_height * fb->pitches[0]; fbi->fix.smem_len = fbi->screen_size; + fbi->flags = FBINFO_DEFAULT; drm_fb_helper_fill_info(fbi, fb_helper, sizes); @@ -2353,19 +2354,21 @@ static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper, fbi->screen_buffer = vzalloc(fbi->screen_size); if (!fbi->screen_buffer) return -ENOMEM; + fbi->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; fbi->fbdefio = &drm_fbdev_defio; - fb_deferred_io_init(fbi); } else { /* buffer is mapped for HW framebuffer */ ret = drm_client_buffer_vmap(fb_helper->buffer, &map); if (ret) return ret; - if (map.is_iomem) + if (map.is_iomem) { fbi->screen_base = map.vaddr_iomem; - else + } else { fbi->screen_buffer = map.vaddr; + fbi->flags |= FBINFO_VIRTFB; + } /* * Shamelessly leak the physical address to user-space. As diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index c313a5b4549c4..7e48dcd1bee4d 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -853,12 +853,57 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, &args->handle); } + +/* + * Try to flatten a dma_fence_chain into a dma_fence_array so that it can be + * added as timeline fence to a chain again. + */ +static int drm_syncobj_flatten_chain(struct dma_fence **f) +{ + struct dma_fence_chain *chain = to_dma_fence_chain(*f); + struct dma_fence *tmp, **fences; + struct dma_fence_array *array; + unsigned int count; + + if (!chain) + return 0; + + count = 0; + dma_fence_chain_for_each(tmp, &chain->base) + ++count; + + fences = kmalloc_array(count, sizeof(*fences), GFP_KERNEL); + if (!fences) + return -ENOMEM; + + count = 0; + dma_fence_chain_for_each(tmp, &chain->base) + fences[count++] = dma_fence_get(tmp); + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), + 1, false); + if (!array) + goto free_fences; + + dma_fence_put(*f); + *f = &array->base; + return 0; + +free_fences: + while (count--) + dma_fence_put(fences[count]); + + kfree(fences); + return -ENOMEM; +} + static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private, struct drm_syncobj_transfer *args) { struct drm_syncobj *timeline_syncobj = NULL; - struct dma_fence *fence; struct dma_fence_chain *chain; + struct dma_fence *fence; int ret; timeline_syncobj = drm_syncobj_find(file_private, args->dst_handle); @@ -869,16 +914,22 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private, args->src_point, args->flags, &fence); if (ret) - goto err; + goto err_put_timeline; + + ret = drm_syncobj_flatten_chain(&fence); + if (ret) + goto err_free_fence; + chain = dma_fence_chain_alloc(); if (!chain) { ret = -ENOMEM; - goto err1; + goto err_free_fence; } + drm_syncobj_add_point(timeline_syncobj, chain, fence, args->dst_point); -err1: +err_free_fence: dma_fence_put(fence); -err: +err_put_timeline: drm_syncobj_put(timeline_syncobj); return ret; diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 8ac196e814d5d..d351b834a6551 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -966,7 +966,8 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) * cause. */ if (!intel_can_enable_sagv(dev_priv, new_bw_state)) { - allowed_points = BIT(max_bw_point); + allowed_points &= ADLS_PSF_PT_MASK; + allowed_points |= BIT(max_bw_point); drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n", max_bw_point); } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index b5e2508db1cfe..62e763faf0aa5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4831,7 +4831,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd) struct intel_dp *intel_dp = &dig_port->dp; if (dig_port->base.type == INTEL_OUTPUT_EDP && - (long_hpd || !intel_pps_have_power(intel_dp))) { + (long_hpd || !intel_pps_have_panel_power_or_vdd(intel_dp))) { /* * vdd off can generate a long/short pulse on eDP which * would require vdd on to handle it, and thus we diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 3b5b9e7b05b7b..866ac090e3e32 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1836,6 +1836,7 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, bool has_hdmi_sink) { struct drm_i915_private *dev_priv = intel_hdmi_to_i915(hdmi); + enum phy phy = intel_port_to_phy(dev_priv, hdmi_to_dig_port(hdmi)->base.port); if (clock < 25000) return MODE_CLOCK_LOW; @@ -1856,6 +1857,14 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, if (IS_CHERRYVIEW(dev_priv) && clock > 216000 && clock < 240000) return MODE_CLOCK_RANGE; + /* ICL+ combo PHY PLL can't generate 500-533.2 MHz */ + if (intel_phy_is_combo(dev_priv, phy) && clock > 500000 && clock < 533200) + return MODE_CLOCK_RANGE; + + /* ICL+ TC PHY PLL can't generate 500-532.8 MHz */ + if (intel_phy_is_tc(dev_priv, phy) && clock > 500000 && clock < 532800) + return MODE_CLOCK_RANGE; + /* * SNPS PHYs' MPLLB table-based programming can only handle a fixed * set of link rates. @@ -1912,7 +1921,7 @@ static bool intel_hdmi_sink_bpc_possible(struct drm_connector *connector, if (ycbcr420_output) return hdmi->y420_dc_modes & DRM_EDID_YCBCR420_DC_36; else - return info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_36; + return info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_36; case 10: if (!has_hdmi_sink) return false; @@ -1920,7 +1929,7 @@ static bool intel_hdmi_sink_bpc_possible(struct drm_connector *connector, if (ycbcr420_output) return hdmi->y420_dc_modes & DRM_EDID_YCBCR420_DC_30; else - return info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30; + return info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30; case 8: return true; default: diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 4a2662838cd8d..df10b6898987a 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -375,6 +375,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, return -EINVAL; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8; diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index e9c679bb1b2eb..5edd188d97479 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -1075,14 +1075,14 @@ static void intel_pps_vdd_sanitize(struct intel_dp *intel_dp) edp_panel_vdd_schedule_off(intel_dp); } -bool intel_pps_have_power(struct intel_dp *intel_dp) +bool intel_pps_have_panel_power_or_vdd(struct intel_dp *intel_dp) { intel_wakeref_t wakeref; bool have_power = false; with_intel_pps_lock(intel_dp, wakeref) { - have_power = edp_have_panel_power(intel_dp) && - edp_have_panel_vdd(intel_dp); + have_power = edp_have_panel_power(intel_dp) || + edp_have_panel_vdd(intel_dp); } return have_power; diff --git a/drivers/gpu/drm/i915/display/intel_pps.h b/drivers/gpu/drm/i915/display/intel_pps.h index fbb47f6f453e4..e64144659d31f 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.h +++ b/drivers/gpu/drm/i915/display/intel_pps.h @@ -37,7 +37,7 @@ void intel_pps_vdd_on(struct intel_dp *intel_dp); void intel_pps_on(struct intel_dp *intel_dp); void intel_pps_off(struct intel_dp *intel_dp); void intel_pps_vdd_off_sync(struct intel_dp *intel_dp); -bool intel_pps_have_power(struct intel_dp *intel_dp); +bool intel_pps_have_panel_power_or_vdd(struct intel_dp *intel_dp); void intel_pps_wait_power_cycle(struct intel_dp *intel_dp); void intel_pps_init(struct intel_dp *intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 00279e8c27756..b00de57cc957e 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1816,6 +1816,9 @@ static void _intel_psr_post_plane_update(const struct intel_atomic_state *state, mutex_lock(&psr->lock); + if (psr->sink_not_reliable) + goto exit; + drm_WARN_ON(&dev_priv->drm, psr->enabled && !crtc_state->active_planes); /* Only enable if there is active planes */ @@ -1826,6 +1829,7 @@ static void _intel_psr_post_plane_update(const struct intel_atomic_state *state, if (crtc_state->crc_enabled && psr->enabled) psr_force_hw_tracking_exit(intel_dp); +exit: mutex_unlock(&psr->lock); } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 1478c02a82cbe..936a257b511c8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -439,7 +439,7 @@ vm_access(struct vm_area_struct *area, unsigned long addr, return -EACCES; addr -= area->vm_start; - if (addr >= obj->base.size) + if (range_overflows_t(u64, addr, len, obj->base.size)) return -EINVAL; i915_gem_ww_ctx_init(&ww, true); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0c70ab08fc0c9..73efed2f30ca7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1146,7 +1146,7 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915) (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) #define MEDIA_VER(i915) (INTEL_INFO(i915)->media.ver) -#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.arch, \ +#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.ver, \ INTEL_INFO(i915)->media.rel) #define IS_MEDIA_VER(i915, from, until) \ (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fae4f7818d28b..12120474c80c7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3722,8 +3722,7 @@ skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) MISSING_CASE(DISPLAY_VER(dev_priv)); } - /* Default to an unusable block time */ - dev_priv->sagv_block_time_us = -1; + dev_priv->sagv_block_time_us = 0; } /* @@ -5652,7 +5651,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, result->min_ddb_alloc = max(min_ddb_alloc, blocks) + 1; result->enable = true; - if (DISPLAY_VER(dev_priv) < 12) + if (DISPLAY_VER(dev_priv) < 12 && dev_priv->sagv_block_time_us) result->can_sagv = latency >= dev_priv->sagv_block_time_us; } @@ -5683,7 +5682,10 @@ static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); struct skl_wm_level *sagv_wm = &plane_wm->sagv.wm0; struct skl_wm_level *levels = plane_wm->wm; - unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us; + unsigned int latency = 0; + + if (dev_priv->sagv_block_time_us) + latency = dev_priv->sagv_block_time_us + dev_priv->wm.skl_latency[0]; skl_compute_plane_wm(crtc_state, 0, latency, wm_params, &levels[0], diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 80f1d439841a6..26aeaf0ab86ef 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -302,42 +302,42 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) if (priv->afbcd.ops) { ret = priv->afbcd.ops->init(priv); if (ret) - return ret; + goto free_drm; } /* Encoder Initialization */ ret = meson_encoder_cvbs_init(priv); if (ret) - goto free_drm; + goto exit_afbcd; if (has_components) { ret = component_bind_all(drm->dev, drm); if (ret) { dev_err(drm->dev, "Couldn't bind all components\n"); - goto free_drm; + goto exit_afbcd; } } ret = meson_encoder_hdmi_init(priv); if (ret) - goto free_drm; + goto exit_afbcd; ret = meson_plane_create(priv); if (ret) - goto free_drm; + goto exit_afbcd; ret = meson_overlay_create(priv); if (ret) - goto free_drm; + goto exit_afbcd; ret = meson_crtc_create(priv); if (ret) - goto free_drm; + goto exit_afbcd; ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); if (ret) - goto free_drm; + goto exit_afbcd; drm_mode_config_reset(drm); @@ -355,6 +355,9 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) uninstall_irq: free_irq(priv->vsync_irq, drm); +exit_afbcd: + if (priv->afbcd.ops) + priv->afbcd.ops->exit(priv); free_drm: drm_dev_put(drm); @@ -385,10 +388,8 @@ static void meson_drv_unbind(struct device *dev) free_irq(priv->vsync_irq, drm); drm_dev_put(drm); - if (priv->afbcd.ops) { - priv->afbcd.ops->reset(priv); - meson_rdma_free(priv); - } + if (priv->afbcd.ops) + priv->afbcd.ops->exit(priv); } static const struct component_master_ops meson_drv_master_ops = { diff --git a/drivers/gpu/drm/meson/meson_osd_afbcd.c b/drivers/gpu/drm/meson/meson_osd_afbcd.c index ffc6b584dbf85..0cdbe899402f8 100644 --- a/drivers/gpu/drm/meson/meson_osd_afbcd.c +++ b/drivers/gpu/drm/meson/meson_osd_afbcd.c @@ -79,11 +79,6 @@ static bool meson_gxm_afbcd_supported_fmt(u64 modifier, uint32_t format) return meson_gxm_afbcd_pixel_fmt(modifier, format) >= 0; } -static int meson_gxm_afbcd_init(struct meson_drm *priv) -{ - return 0; -} - static int meson_gxm_afbcd_reset(struct meson_drm *priv) { writel_relaxed(VIU_SW_RESET_OSD1_AFBCD, @@ -93,6 +88,16 @@ static int meson_gxm_afbcd_reset(struct meson_drm *priv) return 0; } +static int meson_gxm_afbcd_init(struct meson_drm *priv) +{ + return 0; +} + +static void meson_gxm_afbcd_exit(struct meson_drm *priv) +{ + meson_gxm_afbcd_reset(priv); +} + static int meson_gxm_afbcd_enable(struct meson_drm *priv) { writel_relaxed(FIELD_PREP(OSD1_AFBCD_ID_FIFO_THRD, 0x40) | @@ -172,6 +177,7 @@ static int meson_gxm_afbcd_setup(struct meson_drm *priv) struct meson_afbcd_ops meson_afbcd_gxm_ops = { .init = meson_gxm_afbcd_init, + .exit = meson_gxm_afbcd_exit, .reset = meson_gxm_afbcd_reset, .enable = meson_gxm_afbcd_enable, .disable = meson_gxm_afbcd_disable, @@ -269,6 +275,18 @@ static bool meson_g12a_afbcd_supported_fmt(u64 modifier, uint32_t format) return meson_g12a_afbcd_pixel_fmt(modifier, format) >= 0; } +static int meson_g12a_afbcd_reset(struct meson_drm *priv) +{ + meson_rdma_reset(priv); + + meson_rdma_writel_sync(priv, VIU_SW_RESET_G12A_AFBC_ARB | + VIU_SW_RESET_G12A_OSD1_AFBCD, + VIU_SW_RESET); + meson_rdma_writel_sync(priv, 0, VIU_SW_RESET); + + return 0; +} + static int meson_g12a_afbcd_init(struct meson_drm *priv) { int ret; @@ -286,16 +304,10 @@ static int meson_g12a_afbcd_init(struct meson_drm *priv) return 0; } -static int meson_g12a_afbcd_reset(struct meson_drm *priv) +static void meson_g12a_afbcd_exit(struct meson_drm *priv) { - meson_rdma_reset(priv); - - meson_rdma_writel_sync(priv, VIU_SW_RESET_G12A_AFBC_ARB | - VIU_SW_RESET_G12A_OSD1_AFBCD, - VIU_SW_RESET); - meson_rdma_writel_sync(priv, 0, VIU_SW_RESET); - - return 0; + meson_g12a_afbcd_reset(priv); + meson_rdma_free(priv); } static int meson_g12a_afbcd_enable(struct meson_drm *priv) @@ -380,6 +392,7 @@ static int meson_g12a_afbcd_setup(struct meson_drm *priv) struct meson_afbcd_ops meson_afbcd_g12a_ops = { .init = meson_g12a_afbcd_init, + .exit = meson_g12a_afbcd_exit, .reset = meson_g12a_afbcd_reset, .enable = meson_g12a_afbcd_enable, .disable = meson_g12a_afbcd_disable, diff --git a/drivers/gpu/drm/meson/meson_osd_afbcd.h b/drivers/gpu/drm/meson/meson_osd_afbcd.h index 5e5523304f42f..e77ddeb6416f3 100644 --- a/drivers/gpu/drm/meson/meson_osd_afbcd.h +++ b/drivers/gpu/drm/meson/meson_osd_afbcd.h @@ -14,6 +14,7 @@ struct meson_afbcd_ops { int (*init)(struct meson_drm *priv); + void (*exit)(struct meson_drm *priv); int (*reset)(struct meson_drm *priv); int (*enable)(struct meson_drm *priv); int (*disable)(struct meson_drm *priv); diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index b983541a4c530..cd9ba13ad5fc8 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -529,7 +529,10 @@ static void mgag200_set_format_regs(struct mga_device *mdev, WREG_GFX(3, 0x00); WREG_GFX(4, 0x00); WREG_GFX(5, 0x40); - WREG_GFX(6, 0x05); + /* GCTL6 should be 0x05, but we configure memmapsl to 0xb8000 (text mode), + * so that it doesn't hang when running kexec/kdump on G200_SE rev42. + */ + WREG_GFX(6, 0x0d); WREG_GFX(7, 0x0f); WREG_GFX(8, 0x0f); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 17cfad6424db6..616be7265da4d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -655,19 +655,23 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); const u32 *regs = a6xx_protect; - unsigned i, count = ARRAY_SIZE(a6xx_protect), count_max = 32; - - BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32); - BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48); + unsigned i, count, count_max; if (adreno_is_a650(adreno_gpu)) { regs = a650_protect; count = ARRAY_SIZE(a650_protect); count_max = 48; + BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48); } else if (adreno_is_a660_family(adreno_gpu)) { regs = a660_protect; count = ARRAY_SIZE(a660_protect); count_max = 48; + BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48); + } else { + regs = a6xx_protect; + count = ARRAY_SIZE(a6xx_protect); + count_max = 32; + BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32); } /* diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 1e648db439f9b..16ae0cccbbb1e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -168,7 +168,6 @@ enum dpu_enc_rc_states { * @vsync_event_work: worker to handle vsync event for autorefresh * @topology: topology of the display * @idle_timeout: idle timeout duration in milliseconds - * @dp: msm_dp pointer, for DP encoders */ struct dpu_encoder_virt { struct drm_encoder base; @@ -207,8 +206,6 @@ struct dpu_encoder_virt { struct msm_display_topology topology; u32 idle_timeout; - - struct msm_dp *dp; }; #define to_dpu_encoder_virt(x) container_of(x, struct dpu_encoder_virt, base) @@ -1099,7 +1096,7 @@ static void _dpu_encoder_virt_enable_helper(struct drm_encoder *drm_enc) } - if (dpu_enc->disp_info.intf_type == DRM_MODE_CONNECTOR_DisplayPort && + if (dpu_enc->disp_info.intf_type == DRM_MODE_ENCODER_TMDS && dpu_enc->cur_master->hw_mdptop && dpu_enc->cur_master->hw_mdptop->ops.intf_audio_select) dpu_enc->cur_master->hw_mdptop->ops.intf_audio_select( @@ -2128,8 +2125,6 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc, timer_setup(&dpu_enc->vsync_event_timer, dpu_encoder_vsync_event_handler, 0); - else if (disp_info->intf_type == DRM_MODE_ENCODER_TMDS) - dpu_enc->dp = priv->dp[disp_info->h_tile_instance[0]]; INIT_DELAYED_WORK(&dpu_enc->delayed_off_work, dpu_encoder_off_work); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index f9c83d6e427ad..24fbaf562d418 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -35,6 +35,14 @@ int dpu_rm_destroy(struct dpu_rm *rm) { int i; + for (i = 0; i < ARRAY_SIZE(rm->dspp_blks); i++) { + struct dpu_hw_dspp *hw; + + if (rm->dspp_blks[i]) { + hw = to_dpu_hw_dspp(rm->dspp_blks[i]); + dpu_hw_dspp_destroy(hw); + } + } for (i = 0; i < ARRAY_SIZE(rm->pingpong_blks); i++) { struct dpu_hw_pingpong *hw; diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index c724cb0bde9dc..8d1ea694d06cd 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1365,60 +1365,44 @@ static int dp_ctrl_enable_stream_clocks(struct dp_ctrl_private *ctrl) return ret; } -int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset) +void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable) +{ + struct dp_ctrl_private *ctrl; + + ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); + + dp_catalog_ctrl_reset(ctrl->catalog); + + if (enable) + dp_catalog_ctrl_enable_irq(ctrl->catalog, enable); +} + +void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; struct dp_io *dp_io; struct phy *phy; - if (!dp_ctrl) { - DRM_ERROR("Invalid input data\n"); - return -EINVAL; - } - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); dp_io = &ctrl->parser->io; phy = dp_io->phy; - ctrl->dp_ctrl.orientation = flip; - - if (reset) - dp_catalog_ctrl_reset(ctrl->catalog); - - DRM_DEBUG_DP("flip=%d\n", flip); dp_catalog_ctrl_phy_reset(ctrl->catalog); phy_init(phy); - dp_catalog_ctrl_enable_irq(ctrl->catalog, true); - - return 0; } -/** - * dp_ctrl_host_deinit() - Uninitialize DP controller - * @dp_ctrl: Display Port Driver data - * - * Perform required steps to uninitialize DP controller - * and its resources. - */ -void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl) +void dp_ctrl_phy_exit(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; struct dp_io *dp_io; struct phy *phy; - if (!dp_ctrl) { - DRM_ERROR("Invalid input data\n"); - return; - } - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); dp_io = &ctrl->parser->io; phy = dp_io->phy; - dp_catalog_ctrl_enable_irq(ctrl->catalog, false); + dp_catalog_ctrl_phy_reset(ctrl->catalog); phy_exit(phy); - - DRM_DEBUG_DP("Host deinitialized successfully\n"); } static bool dp_ctrl_use_fixed_nvid(struct dp_ctrl_private *ctrl) @@ -1488,7 +1472,10 @@ static int dp_ctrl_deinitialize_mainlink(struct dp_ctrl_private *ctrl) } phy_power_off(phy); + + /* aux channel down, reinit phy */ phy_exit(phy); + phy_init(phy); return 0; } @@ -1761,6 +1748,9 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) /* end with failure */ break; /* lane == 1 already */ } + + /* stop link training before start re training */ + dp_ctrl_clear_training_pattern(ctrl); } } @@ -1893,8 +1883,14 @@ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl) return ret; } + DRM_DEBUG_DP("Before, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + phy_power_off(phy); + DRM_DEBUG_DP("After, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + /* aux channel down, reinit phy */ phy_exit(phy); phy_init(phy); @@ -1903,23 +1899,6 @@ int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl) return ret; } -void dp_ctrl_off_phy(struct dp_ctrl *dp_ctrl) -{ - struct dp_ctrl_private *ctrl; - struct dp_io *dp_io; - struct phy *phy; - - ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); - dp_io = &ctrl->parser->io; - phy = dp_io->phy; - - dp_catalog_ctrl_reset(ctrl->catalog); - - phy_exit(phy); - - DRM_DEBUG_DP("DP off phy done\n"); -} - int dp_ctrl_off(struct dp_ctrl *dp_ctrl) { struct dp_ctrl_private *ctrl; @@ -1947,10 +1926,14 @@ int dp_ctrl_off(struct dp_ctrl *dp_ctrl) DRM_ERROR("Failed to disable link clocks. ret=%d\n", ret); } + DRM_DEBUG_DP("Before, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + phy_power_off(phy); - phy_exit(phy); - DRM_DEBUG_DP("DP off done\n"); + DRM_DEBUG_DP("After, phy=%x init_count=%d power_on=%d\n", + (u32)(uintptr_t)phy, phy->init_count, phy->power_count); + return ret; } diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.h b/drivers/gpu/drm/msm/dp/dp_ctrl.h index 2363a2df9597b..2433edbc70a6d 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.h +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.h @@ -19,12 +19,9 @@ struct dp_ctrl { u32 pixel_rate; }; -int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset); -void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl); int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl); int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl); int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl); -void dp_ctrl_off_phy(struct dp_ctrl *dp_ctrl); int dp_ctrl_off(struct dp_ctrl *dp_ctrl); void dp_ctrl_push_idle(struct dp_ctrl *dp_ctrl); void dp_ctrl_isr(struct dp_ctrl *dp_ctrl); @@ -34,4 +31,9 @@ struct dp_ctrl *dp_ctrl_get(struct device *dev, struct dp_link *link, struct dp_power *power, struct dp_catalog *catalog, struct dp_parser *parser); +void dp_ctrl_reset_irq_ctrl(struct dp_ctrl *dp_ctrl, bool enable); +void dp_ctrl_phy_init(struct dp_ctrl *dp_ctrl); +void dp_ctrl_phy_exit(struct dp_ctrl *dp_ctrl); +void dp_ctrl_irq_phy_exit(struct dp_ctrl *dp_ctrl); + #endif /* _DP_CTRL_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 7cc4d21f20911..1d7f82e6eafea 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -83,6 +83,7 @@ struct dp_display_private { /* state variables */ bool core_initialized; + bool phy_initialized; bool hpd_irq_on; bool audio_supported; @@ -372,36 +373,45 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp) return rc; } -static void dp_display_host_init(struct dp_display_private *dp, int reset) +static void dp_display_host_phy_init(struct dp_display_private *dp) { - bool flip = false; + DRM_DEBUG_DP("core_init=%d phy_init=%d\n", + dp->core_initialized, dp->phy_initialized); - DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); - if (dp->core_initialized) { - DRM_DEBUG_DP("DP core already initialized\n"); - return; + if (!dp->phy_initialized) { + dp_ctrl_phy_init(dp->ctrl); + dp->phy_initialized = true; + } +} + +static void dp_display_host_phy_exit(struct dp_display_private *dp) +{ + DRM_DEBUG_DP("core_init=%d phy_init=%d\n", + dp->core_initialized, dp->phy_initialized); + + if (dp->phy_initialized) { + dp_ctrl_phy_exit(dp->ctrl); + dp->phy_initialized = false; } +} - if (dp->usbpd->orientation == ORIENTATION_CC2) - flip = true; +static void dp_display_host_init(struct dp_display_private *dp) +{ + DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); - dp_power_init(dp->power, flip); - dp_ctrl_host_init(dp->ctrl, flip, reset); + dp_power_init(dp->power, false); + dp_ctrl_reset_irq_ctrl(dp->ctrl, true); dp_aux_init(dp->aux); dp->core_initialized = true; } static void dp_display_host_deinit(struct dp_display_private *dp) { - if (!dp->core_initialized) { - DRM_DEBUG_DP("DP core not initialized\n"); - return; - } + DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); - dp_ctrl_host_deinit(dp->ctrl); + dp_ctrl_reset_irq_ctrl(dp->ctrl, false); dp_aux_deinit(dp->aux); dp_power_deinit(dp->power); - dp->core_initialized = false; } @@ -409,7 +419,7 @@ static int dp_display_usbpd_configure_cb(struct device *dev) { struct dp_display_private *dp = dev_get_dp_display_private(dev); - dp_display_host_init(dp, false); + dp_display_host_phy_init(dp); return dp_display_process_hpd_high(dp); } @@ -530,11 +540,6 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) ret = dp_display_usbpd_configure_cb(&dp->pdev->dev); if (ret) { /* link train failed */ dp->hpd_state = ST_DISCONNECTED; - - if (ret == -ECONNRESET) { /* cable unplugged */ - dp->core_initialized = false; - } - } else { /* start sentinel checking in case of missing uevent */ dp_add_event(dp, EV_CONNECT_PENDING_TIMEOUT, 0, tout); @@ -604,8 +609,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) if (state == ST_DISCONNECTED) { /* triggered by irq_hdp with sink_count = 0 */ if (dp->link->sink_count == 0) { - dp_ctrl_off_phy(dp->ctrl); - dp->core_initialized = false; + dp_display_host_phy_exit(dp); } mutex_unlock(&dp->event_mutex); return 0; @@ -667,7 +671,6 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) { u32 state; - int ret; mutex_lock(&dp->event_mutex); @@ -692,16 +695,8 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) return 0; } - /* - * dp core (ahb/aux clks) must be initialized before - * irq_hpd be handled - */ - if (dp->core_initialized) { - ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); - if (ret == -ECONNRESET) { /* cable unplugged */ - dp->core_initialized = false; - } - } + dp_display_usbpd_attention_cb(&dp->pdev->dev); + DRM_DEBUG_DP("hpd_state=%d\n", state); mutex_unlock(&dp->event_mutex); @@ -892,12 +887,19 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) dp_display->audio_enabled = false; - /* triggered by irq_hpd with sink_count = 0 */ if (dp->link->sink_count == 0) { + /* + * irq_hpd with sink_count = 0 + * hdmi unplugged out of dongle + */ dp_ctrl_off_link_stream(dp->ctrl); } else { + /* + * unplugged interrupt + * dongle unplugged out of DUT + */ dp_ctrl_off(dp->ctrl); - dp->core_initialized = false; + dp_display_host_phy_exit(dp); } dp_display->power_on = false; @@ -1027,7 +1029,7 @@ void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp) static void dp_display_config_hpd(struct dp_display_private *dp) { - dp_display_host_init(dp, true); + dp_display_host_init(dp); dp_catalog_ctrl_hpd_config(dp->catalog); /* Enable interrupt first time @@ -1306,20 +1308,23 @@ static int dp_pm_resume(struct device *dev) dp->hpd_state = ST_DISCONNECTED; /* turn on dp ctrl/phy */ - dp_display_host_init(dp, true); + dp_display_host_init(dp); dp_catalog_ctrl_hpd_config(dp->catalog); - /* - * set sink to normal operation mode -- D0 - * before dpcd read - */ - dp_link_psm_config(dp->link, &dp->panel->link_info, false); if (dp_catalog_link_is_connected(dp->catalog)) { + /* + * set sink to normal operation mode -- D0 + * before dpcd read + */ + dp_display_host_phy_init(dp); + dp_link_psm_config(dp->link, &dp->panel->link_info, false); sink_count = drm_dp_read_sink_count(dp->aux); if (sink_count < 0) sink_count = 0; + + dp_display_host_phy_exit(dp); } dp->link->sink_count = sink_count; @@ -1358,18 +1363,16 @@ static int dp_pm_suspend(struct device *dev) DRM_DEBUG_DP("Before, core_inited=%d power_on=%d\n", dp->core_initialized, dp_display->power_on); - if (dp->core_initialized == true) { - /* mainlink enabled */ - if (dp_power_clk_status(dp->power, DP_CTRL_PM)) - dp_ctrl_off_link_stream(dp->ctrl); - - dp_display_host_deinit(dp); - } + /* mainlink enabled */ + if (dp_power_clk_status(dp->power, DP_CTRL_PM)) + dp_ctrl_off_link_stream(dp->ctrl); - dp->hpd_state = ST_SUSPENDED; + dp_display_host_phy_exit(dp); /* host_init will be called at pm_resume */ - dp->core_initialized = false; + dp_display_host_deinit(dp); + + dp->hpd_state = ST_SUSPENDED; DRM_DEBUG_DP("After, core_inited=%d power_on=%d\n", dp->core_initialized, dp_display->power_on); @@ -1460,6 +1463,7 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, struct drm_encoder *encoder) { struct msm_drm_private *priv; + struct dp_display_private *dp_priv; int ret; if (WARN_ON(!encoder) || WARN_ON(!dp_display) || WARN_ON(!dev)) @@ -1468,6 +1472,8 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, priv = dev->dev_private; dp_display->drm_dev = dev; + dp_priv = container_of(dp_display, struct dp_display_private, dp_display); + ret = dp_display_request_irq(dp_display); if (ret) { DRM_ERROR("request_irq failed, ret=%d\n", ret); @@ -1485,6 +1491,8 @@ int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev, return ret; } + dp_priv->panel->connector = dp_display->connector; + priv->connectors[priv->num_connectors++] = dp_display->connector; dp_display->bridge = msm_dp_bridge_init(dp_display, dev, encoder); @@ -1535,7 +1543,7 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder) state = dp_display->hpd_state; if (state == ST_DISPLAY_OFF) - dp_display_host_init(dp_display, true); + dp_display_host_phy_init(dp_display); dp_display_enable(dp_display, 0); diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index d4d360d19ebad..26ef41a4c1b68 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -169,16 +169,6 @@ struct drm_connector *dp_drm_connector_init(struct msm_dp *dp_display) drm_connector_attach_encoder(connector, dp_display->encoder); - if (dp_display->panel_bridge) { - ret = drm_bridge_attach(dp_display->encoder, - dp_display->panel_bridge, NULL, - DRM_BRIDGE_ATTACH_NO_CONNECTOR); - if (ret < 0) { - DRM_ERROR("failed to attach panel bridge: %d\n", ret); - return ERR_PTR(ret); - } - } - return connector; } @@ -246,5 +236,16 @@ struct drm_bridge *msm_dp_bridge_init(struct msm_dp *dp_display, struct drm_devi return ERR_PTR(rc); } + if (dp_display->panel_bridge) { + rc = drm_bridge_attach(dp_display->encoder, + dp_display->panel_bridge, bridge, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (rc < 0) { + DRM_ERROR("failed to attach panel bridge: %d\n", rc); + drm_bridge_remove(bridge); + return ERR_PTR(rc); + } + } + return bridge; } diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 71db10c0f262d..f1418722c5492 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -212,6 +212,11 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, if (drm_add_modes_noedid(connector, 640, 480)) drm_set_preferred_mode(connector, 640, 480); mutex_unlock(&connector->dev->mode_config.mutex); + } else { + /* always add fail-safe mode as backup mode */ + mutex_lock(&connector->dev->mode_config.mutex); + drm_add_modes_noedid(connector, 640, 480); + mutex_unlock(&connector->dev->mode_config.mutex); } if (panel->aux_cfg_update_done) { diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index d8128f50b0dd5..0b782cc18b3f4 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -562,7 +562,9 @@ static int pll_10nm_register(struct dsi_pll_10nm *pll_10nm, struct clk_hw **prov char clk_name[32], parent[32], vco_name[32]; char parent2[32], parent3[32], parent4[32]; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "xo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", + }, .num_parents = 1, .name = vco_name, .flags = CLK_IGNORE_UNUSED, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index 7414966f198e3..75557ac99adf1 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -802,7 +802,9 @@ static int pll_14nm_register(struct dsi_pll_14nm *pll_14nm, struct clk_hw **prov { char clk_name[32], parent[32], vco_name[32]; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "xo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", + }, .num_parents = 1, .name = vco_name, .flags = CLK_IGNORE_UNUSED, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c index 2da673a2add69..48eab80b548e1 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c @@ -521,7 +521,9 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov { char clk_name[32], parent1[32], parent2[32], vco_name[32]; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "xo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", .name = "xo", + }, .num_parents = 1, .name = vco_name, .flags = CLK_IGNORE_UNUSED, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c index 71ed4aa0dc67e..fc56cdcc9ad64 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c @@ -385,7 +385,9 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov { char *clk_name, *parent_name, *vco_name; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "pxo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", + }, .num_parents = 1, .flags = CLK_IGNORE_UNUSED, .ops = &clk_ops_dsi_pll_28nm_vco, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 079613d2aaa98..6e506feb111fd 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -588,7 +588,9 @@ static int pll_7nm_register(struct dsi_pll_7nm *pll_7nm, struct clk_hw **provide char clk_name[32], parent[32], vco_name[32]; char parent2[32], parent3[32], parent4[32]; struct clk_init_data vco_init = { - .parent_names = (const char *[]){ "bi_tcxo" }, + .parent_data = &(const struct clk_parent_data) { + .fw_name = "ref", + }, .num_parents = 1, .name = vco_name, .flags = CLK_IGNORE_UNUSED, @@ -862,20 +864,26 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, /* Alter PHY configurations if data rate less than 1.5GHZ*/ less_than_1500_mhz = (clk_req->bitclk_rate <= 1500000000); - /* For C-PHY, no low power settings for lower clk rate */ - if (phy->cphy_mode) - less_than_1500_mhz = false; - if (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_1) { vreg_ctrl_0 = less_than_1500_mhz ? 0x53 : 0x52; - glbl_rescode_top_ctrl = less_than_1500_mhz ? 0x3d : 0x00; - glbl_rescode_bot_ctrl = less_than_1500_mhz ? 0x39 : 0x3c; + if (phy->cphy_mode) { + glbl_rescode_top_ctrl = 0x00; + glbl_rescode_bot_ctrl = 0x3c; + } else { + glbl_rescode_top_ctrl = less_than_1500_mhz ? 0x3d : 0x00; + glbl_rescode_bot_ctrl = less_than_1500_mhz ? 0x39 : 0x3c; + } glbl_str_swi_cal_sel_ctrl = 0x00; glbl_hstx_str_ctrl_0 = 0x88; } else { vreg_ctrl_0 = less_than_1500_mhz ? 0x5B : 0x59; - glbl_str_swi_cal_sel_ctrl = less_than_1500_mhz ? 0x03 : 0x00; - glbl_hstx_str_ctrl_0 = less_than_1500_mhz ? 0x66 : 0x88; + if (phy->cphy_mode) { + glbl_str_swi_cal_sel_ctrl = 0x03; + glbl_hstx_str_ctrl_0 = 0x66; + } else { + glbl_str_swi_cal_sel_ctrl = less_than_1500_mhz ? 0x03 : 0x00; + glbl_hstx_str_ctrl_0 = less_than_1500_mhz ? 0x66 : 0x88; + } glbl_rescode_top_ctrl = 0x03; glbl_rescode_bot_ctrl = 0x3c; } diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 9bf319be11f60..12641616acd30 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -83,6 +83,12 @@ static struct devfreq_dev_profile msm_devfreq_profile = { static void msm_devfreq_boost_work(struct kthread_work *work); static void msm_devfreq_idle_work(struct kthread_work *work); +static bool has_devfreq(struct msm_gpu *gpu) +{ + struct msm_gpu_devfreq *df = &gpu->devfreq; + return !!df->devfreq; +} + void msm_devfreq_init(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; @@ -149,6 +155,9 @@ void msm_devfreq_cleanup(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; + if (!has_devfreq(gpu)) + return; + devfreq_cooling_unregister(gpu->cooling); dev_pm_qos_remove_request(&df->boost_freq); dev_pm_qos_remove_request(&df->idle_freq); @@ -156,16 +165,24 @@ void msm_devfreq_cleanup(struct msm_gpu *gpu) void msm_devfreq_resume(struct msm_gpu *gpu) { - gpu->devfreq.busy_cycles = 0; - gpu->devfreq.time = ktime_get(); + struct msm_gpu_devfreq *df = &gpu->devfreq; - devfreq_resume_device(gpu->devfreq.devfreq); + if (!has_devfreq(gpu)) + return; + + df->busy_cycles = 0; + df->time = ktime_get(); + + devfreq_resume_device(df->devfreq); } void msm_devfreq_suspend(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; + if (!has_devfreq(gpu)) + return; + devfreq_suspend_device(df->devfreq); cancel_idle_work(df); @@ -185,6 +202,9 @@ void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor) struct msm_gpu_devfreq *df = &gpu->devfreq; uint64_t freq; + if (!has_devfreq(gpu)) + return; + freq = get_freq(gpu); freq *= factor; @@ -207,7 +227,7 @@ void msm_devfreq_active(struct msm_gpu *gpu) struct devfreq_dev_status status; unsigned int idle_time; - if (!df->devfreq) + if (!has_devfreq(gpu)) return; /* @@ -253,7 +273,7 @@ void msm_devfreq_idle(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; - if (!df->devfreq) + if (!has_devfreq(gpu)) return; msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1), diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index ae2f2abc8f5a5..daf9f87477ba1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -101,7 +101,6 @@ nv40_backlight_init(struct nouveau_encoder *encoder, if (!(nvif_rd32(device, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK)) return -ENODEV; - props->type = BACKLIGHT_RAW; props->max_brightness = 31; *ops = &nv40_bl_ops; return 0; @@ -294,7 +293,8 @@ nv50_backlight_init(struct nouveau_backlight *bl, struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); struct nvif_object *device = &drm->client.device.object; - if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1))) + if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)) || + nv_conn->base.status != connector_status_connected) return -ENODEV; if (nv_conn->type == DCB_CONNECTOR_eDP) { @@ -342,7 +342,6 @@ nv50_backlight_init(struct nouveau_backlight *bl, else *ops = &nva3_bl_ops; - props->type = BACKLIGHT_RAW; props->max_brightness = 100; return 0; @@ -410,6 +409,7 @@ nouveau_backlight_init(struct drm_connector *connector) goto fail_alloc; } + props.type = BACKLIGHT_RAW; bl->dev = backlight_device_register(backlight_name, connector->kdev, nv_encoder, ops, &props); if (IS_ERR(bl->dev)) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c index 667fa016496ee..a6ea89a5d51ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c @@ -142,11 +142,12 @@ nvkm_acr_hsfw_load_bl(struct nvkm_acr *acr, const char *name, int ver, hsfw->imem_size = desc->code_size; hsfw->imem_tag = desc->start_tag; - hsfw->imem = kmalloc(desc->code_size, GFP_KERNEL); - memcpy(hsfw->imem, data + desc->code_off, desc->code_size); - + hsfw->imem = kmemdup(data + desc->code_off, desc->code_size, GFP_KERNEL); nvkm_firmware_put(fw); - return 0; + if (!hsfw->imem) + return -ENOMEM; + else + return 0; } int diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index bbe628b306ee3..f8355de6e335d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -360,8 +360,11 @@ int panfrost_gpu_init(struct panfrost_device *pfdev) panfrost_gpu_init_features(pfdev); - dma_set_mask_and_coherent(pfdev->dev, + err = dma_set_mask_and_coherent(pfdev->dev, DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features))); + if (err) + return err; + dma_set_max_seg_size(pfdev->dev, UINT_MAX); irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu"); diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 607ad5620bd99..1546abcadacf4 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -204,7 +204,7 @@ int radeon_get_monitor_bpc(struct drm_connector *connector) /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */ if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) { - if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) && + if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) && (mode_clock * 5/4 <= max_tmds_clock)) bpc = 10; else diff --git a/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c b/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c index 6b4759ed6bfd4..c491429f1a029 100644 --- a/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c +++ b/drivers/gpu/drm/selftests/test-drm_dp_mst_helper.c @@ -131,8 +131,10 @@ sideband_msg_req_encode_decode(struct drm_dp_sideband_msg_req_body *in) return false; txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL); - if (!txmsg) + if (!txmsg) { + kfree(out); return false; + } drm_dp_encode_sideband_req(in, txmsg); ret = drm_dp_decode_sideband_req(txmsg, out); diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c index 70dfb7d1dec55..f5535eb04c6b1 100644 --- a/drivers/gpu/drm/tegra/dp.c +++ b/drivers/gpu/drm/tegra/dp.c @@ -549,6 +549,15 @@ static void drm_dp_link_get_adjustments(struct drm_dp_link *link, { struct drm_dp_link_train_set *adjust = &link->train.adjust; unsigned int i; + u8 post_cursor; + int err; + + err = drm_dp_dpcd_read(link->aux, DP_ADJUST_REQUEST_POST_CURSOR2, + &post_cursor, sizeof(post_cursor)); + if (err < 0) { + DRM_ERROR("failed to read post_cursor2: %d\n", err); + post_cursor = 0; + } for (i = 0; i < link->lanes; i++) { adjust->voltage_swing[i] = @@ -560,7 +569,7 @@ static void drm_dp_link_get_adjustments(struct drm_dp_link *link, DP_TRAIN_PRE_EMPHASIS_SHIFT; adjust->post_cursor[i] = - drm_dp_get_adjust_request_post_cursor(status, i); + (post_cursor >> (i << 1)) & 0x3; } } diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index f46d377f0c304..de1333dc0d867 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1538,8 +1538,10 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi) dsi->slave = platform_get_drvdata(gangster); of_node_put(np); - if (!dsi->slave) + if (!dsi->slave) { + put_device(&gangster->dev); return -EPROBE_DEFER; + } dsi->slave->master = dsi; } diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 04146da2d1d8e..11576e0297e41 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -798,6 +798,9 @@ static int simpledrm_device_init_modeset(struct simpledrm_device *sdev) if (ret) return ret; drm_connector_helper_add(connector, &simpledrm_connector_helper_funcs); + drm_connector_set_panel_orientation_with_quirk(connector, + DRM_MODE_PANEL_ORIENTATION_UNKNOWN, + mode->hdisplay, mode->vdisplay); formats = simpledrm_device_formats(sdev, &nformats); diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index bd46396a1ae07..1afcd54fbbd53 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -219,6 +219,7 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) int ret; u32 mmu_debug; u32 ident1; + u64 mask; v3d = devm_drm_dev_alloc(dev, &v3d_drm_driver, struct v3d_dev, drm); if (IS_ERR(v3d)) @@ -237,8 +238,11 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) return ret; mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO); - dma_set_mask_and_coherent(dev, - DMA_BIT_MASK(30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_PA_WIDTH))); + mask = DMA_BIT_MASK(30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_PA_WIDTH)); + ret = dma_set_mask_and_coherent(dev, mask); + if (ret) + return ret; + v3d->va_width = 30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_VA_WIDTH); ident1 = V3D_READ(V3D_HUB_IDENT1); diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 2de61b63ef91d..48d3c9955f0dd 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -248,6 +248,9 @@ void virtio_gpu_array_put_free(struct virtio_gpu_object_array *objs) { u32 i; + if (!objs) + return; + for (i = 0; i < objs->nents; i++) drm_gem_object_put(objs->objs[i]); virtio_gpu_array_free(objs); diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 6994f8c0e02ea..80c685ab3e30d 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -447,7 +447,6 @@ static int host1x_probe(struct platform_device *pdev) if (syncpt_irq < 0) return syncpt_irq; - host1x_bo_cache_init(&host->cache); mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); INIT_LIST_HEAD(&host->list); @@ -489,10 +488,12 @@ static int host1x_probe(struct platform_device *pdev) if (err) return err; + host1x_bo_cache_init(&host->cache); + err = host1x_iommu_init(host); if (err < 0) { dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); - return err; + goto destroy_cache; } err = host1x_channel_list_init(&host->channel_list, @@ -553,6 +554,8 @@ static int host1x_probe(struct platform_device *pdev) host1x_channel_list_free(&host->channel_list); iommu_exit: host1x_iommu_exit(host); +destroy_cache: + host1x_bo_cache_destroy(&host->cache); return err; } @@ -568,6 +571,7 @@ static int host1x_remove(struct platform_device *pdev) host1x_intr_deinit(host); host1x_syncpt_deinit(host); + host1x_channel_list_free(&host->channel_list); host1x_iommu_exit(host); host1x_bo_cache_destroy(&host->cache); diff --git a/drivers/greybus/svc.c b/drivers/greybus/svc.c index ce7740ef449ba..51d0875a34800 100644 --- a/drivers/greybus/svc.c +++ b/drivers/greybus/svc.c @@ -866,8 +866,14 @@ static int gb_svc_hello(struct gb_operation *op) gb_svc_debugfs_init(svc); - return gb_svc_queue_deferred_request(op); + ret = gb_svc_queue_deferred_request(op); + if (ret) + goto err_remove_debugfs; + + return 0; +err_remove_debugfs: + gb_svc_debugfs_exit(svc); err_unregister_device: gb_svc_watchdog_destroy(svc); device_del(&svc->dev); diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 6726567d72976..8d6fc50dab65f 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -618,6 +618,17 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, if (report_type == HID_OUTPUT_REPORT) return -EINVAL; + /* + * In case of unnumbered reports the response from the device will + * not have the report ID that the upper layers expect, so we need + * to stash it the buffer ourselves and adjust the data size. + */ + if (!report_number) { + buf[0] = 0; + buf++; + count--; + } + /* +2 bytes to include the size of the reply in the query buffer */ ask_count = min(count + 2, (size_t)ihid->bufsize); @@ -639,6 +650,9 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, count = min(count, ret_count - 2); memcpy(buf, ihid->rawbuf + 2, count); + if (!report_number) + count++; + return count; } @@ -655,17 +669,19 @@ static int i2c_hid_output_raw_report(struct hid_device *hid, __u8 *buf, mutex_lock(&ihid->reset_lock); - if (report_id) { - buf++; - count--; - } - + /* + * Note that both numbered and unnumbered reports passed here + * are supposed to have report ID stored in the 1st byte of the + * buffer, so we strip it off unconditionally before passing payload + * to i2c_hid_set_or_send_report which takes care of encoding + * everything properly. + */ ret = i2c_hid_set_or_send_report(client, report_type == HID_FEATURE_REPORT ? 0x03 : 0x02, - report_id, buf, count, use_data); + report_id, buf + 1, count - 1, use_data); - if (report_id && ret >= 0) - ret++; /* add report_id to the number of transfered bytes */ + if (ret >= 0) + ret++; /* add report_id to the number of transferred bytes */ mutex_unlock(&ihid->reset_lock); diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c index e24988586710d..16aa030af8453 100644 --- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c +++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c @@ -661,21 +661,12 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data, */ payload_max_size &= ~(L1_CACHE_BYTES - 1); - dma_buf = kmalloc(payload_max_size, GFP_KERNEL | GFP_DMA32); + dma_buf = dma_alloc_coherent(devc, payload_max_size, &dma_buf_phy, GFP_KERNEL); if (!dma_buf) { client_data->flag_retry = true; return -ENOMEM; } - dma_buf_phy = dma_map_single(devc, dma_buf, payload_max_size, - DMA_TO_DEVICE); - if (dma_mapping_error(devc, dma_buf_phy)) { - dev_err(cl_data_to_dev(client_data), "DMA map failed\n"); - client_data->flag_retry = true; - rv = -ENOMEM; - goto end_err_dma_buf_release; - } - ldr_xfer_dma_frag.fragment.hdr.command = LOADER_CMD_XFER_FRAGMENT; ldr_xfer_dma_frag.fragment.xfer_mode = LOADER_XFER_MODE_DIRECT_DMA; ldr_xfer_dma_frag.ddr_phys_addr = (u64)dma_buf_phy; @@ -695,14 +686,7 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data, ldr_xfer_dma_frag.fragment.size = fragment_size; memcpy(dma_buf, &fw->data[fragment_offset], fragment_size); - dma_sync_single_for_device(devc, dma_buf_phy, - payload_max_size, - DMA_TO_DEVICE); - - /* - * Flush cache here because the dma_sync_single_for_device() - * does not do for x86. - */ + /* Flush cache to be sure the data is in main memory. */ clflush_cache_range(dma_buf, payload_max_size); dev_dbg(cl_data_to_dev(client_data), @@ -725,15 +709,8 @@ static int ish_fw_xfer_direct_dma(struct ishtp_cl_data *client_data, fragment_offset += fragment_size; } - dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE); - kfree(dma_buf); - return 0; - end_err_resp_buf_release: - /* Free ISH buffer if not done already, in error case */ - dma_unmap_single(devc, dma_buf_phy, payload_max_size, DMA_TO_DEVICE); -end_err_dma_buf_release: - kfree(dma_buf); + dma_free_coherent(devc, payload_max_size, dma_buf, dma_buf_phy); return rv; } diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index f2d05bff42453..439f99b8b5de2 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1563,7 +1563,7 @@ static void balloon_onchannelcallback(void *context) break; default: - pr_warn("Unhandled message: type: %d\n", dm_hdr->type); + pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type); } } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 8df25f1079bac..faeaf9757f65d 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -2252,16 +2252,31 @@ config SENSORS_ASUS_WMI config SENSORS_ASUS_WMI_EC tristate "ASUS WMI B550/X570" - depends on ACPI_WMI + depends on ACPI_WMI && SENSORS_ASUS_EC=n help If you say yes here you get support for the ACPI embedded controller hardware monitoring interface found in B550/X570 ASUS motherboards. This driver will provide readings of fans, voltages and temperatures through the system firmware. + This driver is deprecated in favor of the ASUS EC Sensors driver + which provides fully compatible output. + This driver can also be built as a module. If so, the module will be called asus_wmi_sensors_ec. +config SENSORS_ASUS_EC + tristate "ASUS EC Sensors" + depends on X86 + help + If you say yes here you get support for the ACPI embedded controller + hardware monitoring interface found in ASUS motherboards. The driver + currently supports B550/X570 boards, although other ASUS boards might + provide this monitoring interface as well. + + This driver can also be built as a module. If so, the module + will be called asus_ec_sensors. + endif # ACPI endif # HWMON diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 185f946d698b0..7bba3415ca3df 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_HWMON_VID) += hwmon-vid.o # APCI drivers obj-$(CONFIG_SENSORS_ACPI_POWER) += acpi_power_meter.o obj-$(CONFIG_SENSORS_ATK0110) += asus_atk0110.o +obj-$(CONFIG_SENSORS_ASUS_EC) += asus-ec-sensors.o obj-$(CONFIG_SENSORS_ASUS_WMI) += asus_wmi_sensors.o obj-$(CONFIG_SENSORS_ASUS_WMI_EC) += asus_wmi_ec_sensors.o diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c new file mode 100644 index 0000000000000..3ad8eadea68f3 --- /dev/null +++ b/drivers/hwmon/asus-ec-sensors.c @@ -0,0 +1,717 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * HWMON driver for ASUS motherboards that publish some sensor values + * via the embedded controller registers. + * + * Copyright (C) 2021 Eugene Shalygin + + * EC provides: + * - Chipset temperature + * - CPU temperature + * - Motherboard temperature + * - T_Sensor temperature + * - VRM temperature + * - Water In temperature + * - Water Out temperature + * - CPU Optional fan RPM + * - Chipset fan RPM + * - VRM Heat Sink fan RPM + * - Water Flow fan RPM + * - CPU current + * - CPU core voltage + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static char *mutex_path_override; + +/* Writing to this EC register switches EC bank */ +#define ASUS_EC_BANK_REGISTER 0xff +#define SENSOR_LABEL_LEN 16 + +/* + * Arbitrary set max. allowed bank number. Required for sorting banks and + * currently is overkill with just 2 banks used at max, but for the sake + * of alignment let's set it to a higher value. + */ +#define ASUS_EC_MAX_BANK 3 + +#define ACPI_LOCK_DELAY_MS 500 + +/* ACPI mutex for locking access to the EC for the firmware */ +#define ASUS_HW_ACCESS_MUTEX_ASMX "\\AMW0.ASMX" + +/* There are two variants of the vendor spelling */ +#define VENDOR_ASUS_UPPER_CASE "ASUSTeK COMPUTER INC." + +typedef union { + u32 value; + struct { + u8 index; + u8 bank; + u8 size; + u8 dummy; + } components; +} sensor_address; + +#define MAKE_SENSOR_ADDRESS(size, bank, index) { \ + .value = (size << 16) + (bank << 8) + index \ + } + +static u32 hwmon_attributes[hwmon_max] = { + [hwmon_chip] = HWMON_C_REGISTER_TZ, + [hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL, + [hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL, + [hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL, + [hwmon_fan] = HWMON_F_INPUT | HWMON_F_LABEL, +}; + +struct ec_sensor_info { + char label[SENSOR_LABEL_LEN]; + enum hwmon_sensor_types type; + sensor_address addr; +}; + +#define EC_SENSOR(sensor_label, sensor_type, size, bank, index) { \ + .label = sensor_label, .type = sensor_type, \ + .addr = MAKE_SENSOR_ADDRESS(size, bank, index), \ + } + +enum ec_sensors { + /* chipset temperature [℃] */ + ec_sensor_temp_chipset, + /* CPU temperature [℃] */ + ec_sensor_temp_cpu, + /* motherboard temperature [℃] */ + ec_sensor_temp_mb, + /* "T_Sensor" temperature sensor reading [℃] */ + ec_sensor_temp_t_sensor, + /* VRM temperature [℃] */ + ec_sensor_temp_vrm, + /* CPU Core voltage [mV] */ + ec_sensor_in_cpu_core, + /* CPU_Opt fan [RPM] */ + ec_sensor_fan_cpu_opt, + /* VRM heat sink fan [RPM] */ + ec_sensor_fan_vrm_hs, + /* Chipset fan [RPM] */ + ec_sensor_fan_chipset, + /* Water flow sensor reading [RPM] */ + ec_sensor_fan_water_flow, + /* CPU current [A] */ + ec_sensor_curr_cpu, + /* "Water_In" temperature sensor reading [℃] */ + ec_sensor_temp_water_in, + /* "Water_Out" temperature sensor reading [℃] */ + ec_sensor_temp_water_out, +}; + +#define SENSOR_TEMP_CHIPSET BIT(ec_sensor_temp_chipset) +#define SENSOR_TEMP_CPU BIT(ec_sensor_temp_cpu) +#define SENSOR_TEMP_MB BIT(ec_sensor_temp_mb) +#define SENSOR_TEMP_T_SENSOR BIT(ec_sensor_temp_t_sensor) +#define SENSOR_TEMP_VRM BIT(ec_sensor_temp_vrm) +#define SENSOR_IN_CPU_CORE BIT(ec_sensor_in_cpu_core) +#define SENSOR_FAN_CPU_OPT BIT(ec_sensor_fan_cpu_opt) +#define SENSOR_FAN_VRM_HS BIT(ec_sensor_fan_vrm_hs) +#define SENSOR_FAN_CHIPSET BIT(ec_sensor_fan_chipset) +#define SENSOR_FAN_WATER_FLOW BIT(ec_sensor_fan_water_flow) +#define SENSOR_CURR_CPU BIT(ec_sensor_curr_cpu) +#define SENSOR_TEMP_WATER_IN BIT(ec_sensor_temp_water_in) +#define SENSOR_TEMP_WATER_OUT BIT(ec_sensor_temp_water_out) + +/* All the known sensors for ASUS EC controllers */ +static const struct ec_sensor_info known_ec_sensors[] = { + [ec_sensor_temp_chipset] = + EC_SENSOR("Chipset", hwmon_temp, 1, 0x00, 0x3a), + [ec_sensor_temp_cpu] = EC_SENSOR("CPU", hwmon_temp, 1, 0x00, 0x3b), + [ec_sensor_temp_mb] = + EC_SENSOR("Motherboard", hwmon_temp, 1, 0x00, 0x3c), + [ec_sensor_temp_t_sensor] = + EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d), + [ec_sensor_temp_vrm] = EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e), + [ec_sensor_in_cpu_core] = + EC_SENSOR("CPU Core", hwmon_in, 2, 0x00, 0xa2), + [ec_sensor_fan_cpu_opt] = + EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xb0), + [ec_sensor_fan_vrm_hs] = EC_SENSOR("VRM HS", hwmon_fan, 2, 0x00, 0xb2), + [ec_sensor_fan_chipset] = + EC_SENSOR("Chipset", hwmon_fan, 2, 0x00, 0xb4), + [ec_sensor_fan_water_flow] = + EC_SENSOR("Water_Flow", hwmon_fan, 2, 0x00, 0xbc), + [ec_sensor_curr_cpu] = EC_SENSOR("CPU", hwmon_curr, 1, 0x00, 0xf4), + [ec_sensor_temp_water_in] = + EC_SENSOR("Water_In", hwmon_temp, 1, 0x01, 0x00), + [ec_sensor_temp_water_out] = + EC_SENSOR("Water_Out", hwmon_temp, 1, 0x01, 0x01), +}; + +/* Shortcuts for common combinations */ +#define SENSOR_SET_TEMP_CHIPSET_CPU_MB \ + (SENSOR_TEMP_CHIPSET | SENSOR_TEMP_CPU | SENSOR_TEMP_MB) +#define SENSOR_SET_TEMP_WATER (SENSOR_TEMP_WATER_IN | SENSOR_TEMP_WATER_OUT) + +#define DMI_EXACT_MATCH_BOARD(vendor, name, sensors) { \ + .matches = { \ + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, vendor), \ + DMI_EXACT_MATCH(DMI_BOARD_NAME, name), \ + }, \ + .driver_data = (void *)(sensors), \ +} + +static const struct dmi_system_id asus_ec_dmi_table[] __initconst = { + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "PRIME X570-PRO", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM | + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "Pro WS X570-ACE", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM | + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII DARK HERO", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | + SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII FORMULA", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG CROSSHAIR VIII HERO", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | + SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | + SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII HERO (WI-FI)", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER | + SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | + SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, + "ROG CROSSHAIR VIII IMPACT", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-E GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CPU_OPT), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX B550-I GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_VRM_HS | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-E GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | + SENSOR_TEMP_VRM | SENSOR_FAN_CHIPSET | + SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-F GAMING", + SENSOR_SET_TEMP_CHIPSET_CPU_MB | + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET), + DMI_EXACT_MATCH_BOARD(VENDOR_ASUS_UPPER_CASE, "ROG STRIX X570-I GAMING", + SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS | + SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE), + {} +}; + +struct ec_sensor { + unsigned int info_index; + s32 cached_value; +}; + +struct ec_sensors_data { + unsigned long board_sensors; + struct ec_sensor *sensors; + /* EC registers to read from */ + u16 *registers; + u8 *read_buffer; + /* sorted list of unique register banks */ + u8 banks[ASUS_EC_MAX_BANK + 1]; + /* in jiffies */ + unsigned long last_updated; + acpi_handle aml_mutex; + /* number of board EC sensors */ + u8 nr_sensors; + /* + * number of EC registers to read + * (sensor might span more than 1 register) + */ + u8 nr_registers; + /* number of unique register banks */ + u8 nr_banks; +}; + +static u8 register_bank(u16 reg) +{ + return reg >> 8; +} + +static u8 register_index(u16 reg) +{ + return reg & 0x00ff; +} + +static bool is_sensor_data_signed(const struct ec_sensor_info *si) +{ + /* + * guessed from WMI functions in DSDT code for boards + * of the X470 generation + */ + return si->type == hwmon_temp; +} + +static const struct ec_sensor_info * +get_sensor_info(const struct ec_sensors_data *state, int index) +{ + return &known_ec_sensors[state->sensors[index].info_index]; +} + +static int find_ec_sensor_index(const struct ec_sensors_data *ec, + enum hwmon_sensor_types type, int channel) +{ + unsigned int i; + + for (i = 0; i < ec->nr_sensors; i++) { + if (get_sensor_info(ec, i)->type == type) { + if (channel == 0) + return i; + channel--; + } + } + return -ENOENT; +} + +static int __init bank_compare(const void *a, const void *b) +{ + return *((const s8 *)a) - *((const s8 *)b); +} + +static int __init board_sensors_count(unsigned long sensors) +{ + return hweight_long(sensors); +} + +static void __init setup_sensor_data(struct ec_sensors_data *ec) +{ + struct ec_sensor *s = ec->sensors; + bool bank_found; + int i, j; + u8 bank; + + ec->nr_banks = 0; + ec->nr_registers = 0; + + for_each_set_bit(i, &ec->board_sensors, + BITS_PER_TYPE(ec->board_sensors)) { + s->info_index = i; + s->cached_value = 0; + ec->nr_registers += + known_ec_sensors[s->info_index].addr.components.size; + bank_found = false; + bank = known_ec_sensors[s->info_index].addr.components.bank; + for (j = 0; j < ec->nr_banks; j++) { + if (ec->banks[j] == bank) { + bank_found = true; + break; + } + } + if (!bank_found) { + ec->banks[ec->nr_banks++] = bank; + } + s++; + } + sort(ec->banks, ec->nr_banks, 1, bank_compare, NULL); +} + +static void __init fill_ec_registers(struct ec_sensors_data *ec) +{ + const struct ec_sensor_info *si; + unsigned int i, j, register_idx = 0; + + for (i = 0; i < ec->nr_sensors; ++i) { + si = get_sensor_info(ec, i); + for (j = 0; j < si->addr.components.size; ++j, ++register_idx) { + ec->registers[register_idx] = + (si->addr.components.bank << 8) + + si->addr.components.index + j; + } + } +} + +static acpi_handle __init asus_hw_access_mutex(struct device *dev) +{ + const char *mutex_path; + acpi_handle res; + int status; + + mutex_path = mutex_path_override ? + mutex_path_override : ASUS_HW_ACCESS_MUTEX_ASMX; + + status = acpi_get_handle(NULL, (acpi_string)mutex_path, &res); + if (ACPI_FAILURE(status)) { + dev_err(dev, + "Could not get hardware access guard mutex '%s': error %d", + mutex_path, status); + return NULL; + } + return res; +} + +static int asus_ec_bank_switch(u8 bank, u8 *old) +{ + int status = 0; + + if (old) { + status = ec_read(ASUS_EC_BANK_REGISTER, old); + } + if (status || (old && (*old == bank))) + return status; + return ec_write(ASUS_EC_BANK_REGISTER, bank); +} + +static int asus_ec_block_read(const struct device *dev, + struct ec_sensors_data *ec) +{ + int ireg, ibank, status; + u8 bank, reg_bank, prev_bank; + + bank = 0; + status = asus_ec_bank_switch(bank, &prev_bank); + if (status) { + dev_warn(dev, "EC bank switch failed"); + return status; + } + + if (prev_bank) { + /* oops... somebody else is working with the EC too */ + dev_warn(dev, + "Concurrent access to the ACPI EC detected.\nRace condition possible."); + } + + /* read registers minimizing bank switches. */ + for (ibank = 0; ibank < ec->nr_banks; ibank++) { + if (bank != ec->banks[ibank]) { + bank = ec->banks[ibank]; + if (asus_ec_bank_switch(bank, NULL)) { + dev_warn(dev, "EC bank switch to %d failed", + bank); + break; + } + } + for (ireg = 0; ireg < ec->nr_registers; ireg++) { + reg_bank = register_bank(ec->registers[ireg]); + if (reg_bank < bank) { + continue; + } + ec_read(register_index(ec->registers[ireg]), + ec->read_buffer + ireg); + } + } + + status = asus_ec_bank_switch(prev_bank, NULL); + return status; +} + +static inline s32 get_sensor_value(const struct ec_sensor_info *si, u8 *data) +{ + if (is_sensor_data_signed(si)) { + switch (si->addr.components.size) { + case 1: + return (s8)*data; + case 2: + return (s16)get_unaligned_be16(data); + case 4: + return (s32)get_unaligned_be32(data); + default: + return 0; + } + } else { + switch (si->addr.components.size) { + case 1: + return *data; + case 2: + return get_unaligned_be16(data); + case 4: + return get_unaligned_be32(data); + default: + return 0; + } + } +} + +static void update_sensor_values(struct ec_sensors_data *ec, u8 *data) +{ + const struct ec_sensor_info *si; + struct ec_sensor *s; + + for (s = ec->sensors; s != ec->sensors + ec->nr_sensors; s++) { + si = &known_ec_sensors[s->info_index]; + s->cached_value = get_sensor_value(si, data); + data += si->addr.components.size; + } +} + +static int update_ec_sensors(const struct device *dev, + struct ec_sensors_data *ec) +{ + int status; + + /* + * ASUS DSDT does not specify that access to the EC has to be guarded, + * but firmware does access it via ACPI + */ + if (ACPI_FAILURE(acpi_acquire_mutex(ec->aml_mutex, NULL, + ACPI_LOCK_DELAY_MS))) { + dev_err(dev, "Failed to acquire AML mutex"); + status = -EBUSY; + goto cleanup; + } + + status = asus_ec_block_read(dev, ec); + + if (!status) { + update_sensor_values(ec, ec->read_buffer); + } + if (ACPI_FAILURE(acpi_release_mutex(ec->aml_mutex, NULL))) { + dev_err(dev, "Failed to release AML mutex"); + } +cleanup: + return status; +} + +static long scale_sensor_value(s32 value, int data_type) +{ + switch (data_type) { + case hwmon_curr: + case hwmon_temp: + return value * MILLI; + default: + return value; + } +} + +static int get_cached_value_or_update(const struct device *dev, + int sensor_index, + struct ec_sensors_data *state, s32 *value) +{ + if (time_after(jiffies, state->last_updated + HZ)) { + if (update_ec_sensors(dev, state)) { + dev_err(dev, "update_ec_sensors() failure\n"); + return -EIO; + } + + state->last_updated = jiffies; + } + + *value = state->sensors[sensor_index].cached_value; + return 0; +} + +/* + * Now follow the functions that implement the hwmon interface + */ + +static int asus_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + int ret; + s32 value = 0; + + struct ec_sensors_data *state = dev_get_drvdata(dev); + int sidx = find_ec_sensor_index(state, type, channel); + + if (sidx < 0) { + return sidx; + } + + ret = get_cached_value_or_update(dev, sidx, state, &value); + if (!ret) { + *val = scale_sensor_value(value, + get_sensor_info(state, sidx)->type); + } + + return ret; +} + +static int asus_ec_hwmon_read_string(struct device *dev, + enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) +{ + struct ec_sensors_data *state = dev_get_drvdata(dev); + int sensor_index = find_ec_sensor_index(state, type, channel); + *str = get_sensor_info(state, sensor_index)->label; + + return 0; +} + +static umode_t asus_ec_hwmon_is_visible(const void *drvdata, + enum hwmon_sensor_types type, u32 attr, + int channel) +{ + const struct ec_sensors_data *state = drvdata; + + return find_ec_sensor_index(state, type, channel) >= 0 ? S_IRUGO : 0; +} + +static int __init +asus_ec_hwmon_add_chan_info(struct hwmon_channel_info *asus_ec_hwmon_chan, + struct device *dev, int num, + enum hwmon_sensor_types type, u32 config) +{ + int i; + u32 *cfg = devm_kcalloc(dev, num + 1, sizeof(*cfg), GFP_KERNEL); + + if (!cfg) + return -ENOMEM; + + asus_ec_hwmon_chan->type = type; + asus_ec_hwmon_chan->config = cfg; + for (i = 0; i < num; i++, cfg++) + *cfg = config; + + return 0; +} + +static const struct hwmon_ops asus_ec_hwmon_ops = { + .is_visible = asus_ec_hwmon_is_visible, + .read = asus_ec_hwmon_read, + .read_string = asus_ec_hwmon_read_string, +}; + +static struct hwmon_chip_info asus_ec_chip_info = { + .ops = &asus_ec_hwmon_ops, +}; + +static unsigned long __init get_board_sensors(void) +{ + const struct dmi_system_id *dmi_entry = + dmi_first_match(asus_ec_dmi_table); + + return dmi_entry ? (unsigned long)dmi_entry->driver_data : 0; +} + +static int __init asus_ec_probe(struct platform_device *pdev) +{ + const struct hwmon_channel_info **ptr_asus_ec_ci; + int nr_count[hwmon_max] = { 0 }, nr_types = 0; + struct hwmon_channel_info *asus_ec_hwmon_chan; + const struct hwmon_chip_info *chip_info; + struct device *dev = &pdev->dev; + struct ec_sensors_data *ec_data; + const struct ec_sensor_info *si; + enum hwmon_sensor_types type; + unsigned long board_sensors; + struct device *hwdev; + unsigned int i; + + board_sensors = get_board_sensors(); + if (!board_sensors) + return -ENODEV; + + ec_data = devm_kzalloc(dev, sizeof(struct ec_sensors_data), + GFP_KERNEL); + if (!ec_data) + return -ENOMEM; + + dev_set_drvdata(dev, ec_data); + ec_data->board_sensors = board_sensors; + ec_data->nr_sensors = board_sensors_count(ec_data->board_sensors); + ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, + sizeof(struct ec_sensor), GFP_KERNEL); + + setup_sensor_data(ec_data); + ec_data->registers = devm_kcalloc(dev, ec_data->nr_registers, + sizeof(u16), GFP_KERNEL); + ec_data->read_buffer = devm_kcalloc(dev, ec_data->nr_registers, + sizeof(u8), GFP_KERNEL); + + if (!ec_data->registers || !ec_data->read_buffer) + return -ENOMEM; + + fill_ec_registers(ec_data); + + ec_data->aml_mutex = asus_hw_access_mutex(dev); + + for (i = 0; i < ec_data->nr_sensors; ++i) { + si = get_sensor_info(ec_data, i); + if (!nr_count[si->type]) + ++nr_types; + ++nr_count[si->type]; + } + + if (nr_count[hwmon_temp]) + nr_count[hwmon_chip]++, nr_types++; + + asus_ec_hwmon_chan = devm_kcalloc( + dev, nr_types, sizeof(*asus_ec_hwmon_chan), GFP_KERNEL); + if (!asus_ec_hwmon_chan) + return -ENOMEM; + + ptr_asus_ec_ci = devm_kcalloc(dev, nr_types + 1, + sizeof(*ptr_asus_ec_ci), GFP_KERNEL); + if (!ptr_asus_ec_ci) + return -ENOMEM; + + asus_ec_chip_info.info = ptr_asus_ec_ci; + chip_info = &asus_ec_chip_info; + + for (type = 0; type < hwmon_max; ++type) { + if (!nr_count[type]) + continue; + + asus_ec_hwmon_add_chan_info(asus_ec_hwmon_chan, dev, + nr_count[type], type, + hwmon_attributes[type]); + *ptr_asus_ec_ci++ = asus_ec_hwmon_chan++; + } + + dev_info(dev, "board has %d EC sensors that span %d registers", + ec_data->nr_sensors, ec_data->nr_registers); + + hwdev = devm_hwmon_device_register_with_info(dev, "asusec", + ec_data, chip_info, NULL); + + return PTR_ERR_OR_ZERO(hwdev); +} + + +static const struct acpi_device_id acpi_ec_ids[] = { + /* Embedded Controller Device */ + { "PNP0C09", 0 }, + {} +}; + +static struct platform_driver asus_ec_sensors_platform_driver = { + .driver = { + .name = "asus-ec-sensors", + .acpi_match_table = acpi_ec_ids, + }, +}; + +MODULE_DEVICE_TABLE(dmi, asus_ec_dmi_table); +module_platform_driver_probe(asus_ec_sensors_platform_driver, asus_ec_probe); + +module_param_named(mutex_path, mutex_path_override, charp, 0); +MODULE_PARM_DESC(mutex_path, + "Override ACPI mutex path used to guard access to hardware"); + +MODULE_AUTHOR("Eugene Shalygin "); +MODULE_DESCRIPTION( + "HWMON driver for sensors accessible via ACPI EC in ASUS motherboards"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/asus_wmi_sensors.c b/drivers/hwmon/asus_wmi_sensors.c index c80eee874b6c0..9e935e34c9983 100644 --- a/drivers/hwmon/asus_wmi_sensors.c +++ b/drivers/hwmon/asus_wmi_sensors.c @@ -71,12 +71,13 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = { DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X399-A"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X470-PRO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI EXTREME"), - DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("CROSSHAIR VI HERO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO (WI-FI AC)"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-E GAMING"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-F GAMING II"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B450-I GAMING"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X399-E GAMING"), DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X470-F GAMING"), diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 098d12b9ecdad..2b91f7e05126e 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -308,6 +308,7 @@ static void superio_exit(struct nct6775_sio_data *sio_data) #define NUM_TEMP 10 /* Max number of temp attribute sets w/ limits*/ #define NUM_TEMP_FIXED 6 /* Max number of fixed temp attribute sets */ +#define NUM_TSI_TEMP 8 /* Max number of TSI temp register pairs */ #define NUM_REG_ALARM 7 /* Max number of alarm registers */ #define NUM_REG_BEEP 5 /* Max number of beep registers */ @@ -498,6 +499,8 @@ static const u16 NCT6775_REG_TEMP_CRIT[32] = { [11] = 0xa07 }; +static const u16 NCT6775_REG_TSI_TEMP[] = { 0x669 }; + /* NCT6776 specific data */ /* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */ @@ -581,6 +584,9 @@ static const u16 NCT6776_REG_TEMP_CRIT[32] = { [12] = 0x70a, }; +static const u16 NCT6776_REG_TSI_TEMP[] = { + 0x409, 0x40b, 0x40d, 0x40f, 0x411, 0x413, 0x415, 0x417 }; + /* NCT6779 specific data */ static const u16 NCT6779_REG_IN[] = { @@ -864,6 +870,8 @@ static const char *const nct6796_temp_label[] = { #define NCT6796_TEMP_MASK 0xbfff0ffe #define NCT6796_VIRT_TEMP_MASK 0x80000c00 +static const u16 NCT6796_REG_TSI_TEMP[] = { 0x409, 0x40b }; + static const char *const nct6798_temp_label[] = { "", "SYSTIN", @@ -1005,6 +1013,8 @@ static const u16 NCT6106_REG_TEMP_CRIT[32] = { [12] = 0x205, }; +static const u16 NCT6106_REG_TSI_TEMP[] = { 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x63, 0x65, 0x67 }; + /* NCT6112D/NCT6114D/NCT6116D specific data */ static const u16 NCT6116_REG_FAN[] = { 0x20, 0x22, 0x24, 0x26, 0x28 }; @@ -1069,6 +1079,8 @@ static const s8 NCT6116_BEEP_BITS[] = { 34, -1 /* intrusion0, intrusion1 */ }; +static const u16 NCT6116_REG_TSI_TEMP[] = { 0x59, 0x5b }; + static enum pwm_enable reg_to_pwm_enable(int pwm, int mode) { if (mode == 0 && pwm == 255) @@ -1169,6 +1181,12 @@ static inline u8 in_to_reg(u32 val, u8 nr) return clamp_val(DIV_ROUND_CLOSEST(val * 100, scale_in[nr]), 0, 255); } +/* TSI temperatures are in 8.3 format */ +static inline unsigned int tsi_temp_from_reg(unsigned int reg) +{ + return (reg >> 5) * 125; +} + /* * Data structures and manipulation thereof */ @@ -1179,7 +1197,7 @@ struct nct6775_data { enum kinds kind; const char *name; - const struct attribute_group *groups[6]; + const struct attribute_group *groups[7]; u16 reg_temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst, * 3=temp_crit, 4=temp_lcrit @@ -1240,6 +1258,8 @@ struct nct6775_data { const u16 *REG_ALARM; const u16 *REG_BEEP; + const u16 *REG_TSI_TEMP; + unsigned int (*fan_from_reg)(u16 reg, unsigned int divreg); unsigned int (*fan_from_reg_min)(u16 reg, unsigned int divreg); @@ -1267,6 +1287,7 @@ struct nct6775_data { s8 temp_offset[NUM_TEMP_FIXED]; s16 temp[5][NUM_TEMP]; /* 0=temp, 1=temp_over, 2=temp_hyst, * 3=temp_crit, 4=temp_lcrit */ + s16 tsi_temp[NUM_TSI_TEMP]; u64 alarms; u64 beeps; @@ -1315,6 +1336,7 @@ struct nct6775_data { u16 have_temp; u16 have_temp_fixed; + u16 have_tsi_temp; u16 have_in; /* Remember extra register values over suspend/resume */ @@ -1464,13 +1486,15 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) switch (data->kind) { case nct6106: return reg == 0x20 || reg == 0x22 || reg == 0x24 || + (reg >= 0x59 && reg < 0x69 && (reg & 1)) || reg == 0xe0 || reg == 0xe2 || reg == 0xe4 || reg == 0x111 || reg == 0x121 || reg == 0x131; case nct6116: return reg == 0x20 || reg == 0x22 || reg == 0x24 || - reg == 0x26 || reg == 0x28 || reg == 0xe0 || reg == 0xe2 || - reg == 0xe4 || reg == 0xe6 || reg == 0xe8 || reg == 0x111 || - reg == 0x121 || reg == 0x131 || reg == 0x191 || reg == 0x1a1; + reg == 0x26 || reg == 0x28 || reg == 0x59 || reg == 0x5b || + reg == 0xe0 || reg == 0xe2 || reg == 0xe4 || reg == 0xe6 || + reg == 0xe8 || reg == 0x111 || reg == 0x121 || reg == 0x131 || + reg == 0x191 || reg == 0x1a1; case nct6775: return (((reg & 0xff00) == 0x100 || (reg & 0xff00) == 0x200) && @@ -1479,7 +1503,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) (reg & 0x00ff) == 0x55)) || (reg & 0xfff0) == 0x630 || reg == 0x640 || reg == 0x642 || - reg == 0x662 || + reg == 0x662 || reg == 0x669 || ((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) || reg == 0x73 || reg == 0x75 || reg == 0x77; case nct6776: @@ -1490,6 +1514,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) (reg & 0x00ff) == 0x55)) || (reg & 0xfff0) == 0x630 || reg == 0x402 || + (reg >= 0x409 && reg < 0x419 && (reg & 1)) || reg == 0x640 || reg == 0x642 || ((reg & 0xfff0) == 0x650 && (reg & 0x000f) >= 0x06) || reg == 0x73 || reg == 0x75 || reg == 0x77; @@ -1504,6 +1529,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) return reg == 0x150 || reg == 0x153 || reg == 0x155 || (reg & 0xfff0) == 0x4c0 || reg == 0x402 || + (reg >= 0x409 && reg < 0x419 && (reg & 1)) || reg == 0x63a || reg == 0x63c || reg == 0x63e || reg == 0x640 || reg == 0x642 || reg == 0x64a || reg == 0x64c || @@ -1987,6 +2013,12 @@ static struct nct6775_data *nct6775_update_device(struct device *dev) data->REG_TEMP_OFFSET[i]); } + for (i = 0; i < NUM_TSI_TEMP; i++) { + if (!(data->have_tsi_temp & BIT(i))) + continue; + data->tsi_temp[i] = data->read_value(data, data->REG_TSI_TEMP[i]); + } + data->alarms = 0; for (i = 0; i < NUM_REG_ALARM; i++) { u8 alarm; @@ -2670,6 +2702,44 @@ static const struct sensor_template_group nct6775_temp_template_group = { .base = 1, }; +static ssize_t show_tsi_temp(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct nct6775_data *data = nct6775_update_device(dev); + struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); + + return sysfs_emit(buf, "%u\n", tsi_temp_from_reg(data->tsi_temp[sattr->index])); +} + +static ssize_t show_tsi_temp_label(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); + + return sysfs_emit(buf, "TSI%d_TEMP\n", sattr->index); +} + +SENSOR_TEMPLATE(tsi_temp_input, "temp%d_input", 0444, show_tsi_temp, NULL, 0); +SENSOR_TEMPLATE(tsi_temp_label, "temp%d_label", 0444, show_tsi_temp_label, NULL, 0); + +static umode_t nct6775_tsi_temp_is_visible(struct kobject *kobj, struct attribute *attr, + int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct nct6775_data *data = dev_get_drvdata(dev); + int temp = index / 2; + + return (data->have_tsi_temp & BIT(temp)) ? attr->mode : 0; +} + +/* + * The index calculation in nct6775_tsi_temp_is_visible() must be kept in + * sync with the size of this array. + */ +static struct sensor_device_template *nct6775_tsi_temp_template[] = { + &sensor_dev_template_tsi_temp_input, + &sensor_dev_template_tsi_temp_label, + NULL +}; + static ssize_t show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf) { @@ -3948,10 +4018,11 @@ static int nct6775_probe(struct platform_device *pdev) const u16 *reg_temp, *reg_temp_over, *reg_temp_hyst, *reg_temp_config; const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit; const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL; - int num_reg_temp, num_reg_temp_mon; + int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp; u8 cr2a; struct attribute_group *group; struct device *hwmon_dev; + struct sensor_template_group tsi_temp_tg; int num_attr_groups = 0; if (sio_data->access == access_direct) { @@ -4043,11 +4114,13 @@ static int nct6775_probe(struct platform_device *pdev) data->ALARM_BITS = NCT6106_ALARM_BITS; data->REG_BEEP = NCT6106_REG_BEEP; data->BEEP_BITS = NCT6106_BEEP_BITS; + data->REG_TSI_TEMP = NCT6106_REG_TSI_TEMP; reg_temp = NCT6106_REG_TEMP; reg_temp_mon = NCT6106_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6106_REG_TSI_TEMP); reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; @@ -4116,11 +4189,13 @@ static int nct6775_probe(struct platform_device *pdev) data->ALARM_BITS = NCT6116_ALARM_BITS; data->REG_BEEP = NCT6106_REG_BEEP; data->BEEP_BITS = NCT6116_BEEP_BITS; + data->REG_TSI_TEMP = NCT6116_REG_TSI_TEMP; reg_temp = NCT6106_REG_TEMP; reg_temp_mon = NCT6106_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6106_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6106_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6116_REG_TSI_TEMP); reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; @@ -4191,11 +4266,13 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6775_REG_ALARM; data->REG_BEEP = NCT6775_REG_BEEP; + data->REG_TSI_TEMP = NCT6775_REG_TSI_TEMP; reg_temp = NCT6775_REG_TEMP; reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6775_REG_TSI_TEMP); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6775_REG_TEMP_CONFIG; @@ -4264,11 +4341,13 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6775_REG_ALARM; data->REG_BEEP = NCT6776_REG_BEEP; + data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP; reg_temp = NCT6775_REG_TEMP; reg_temp_mon = NCT6775_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6775_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6775_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP); reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6776_REG_TEMP_CONFIG; @@ -4341,11 +4420,13 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_WEIGHT_TEMP[2] = NCT6775_REG_WEIGHT_TEMP_BASE; data->REG_ALARM = NCT6779_REG_ALARM; data->REG_BEEP = NCT6776_REG_BEEP; + data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP; reg_temp = NCT6779_REG_TEMP; reg_temp_mon = NCT6779_REG_TEMP_MON; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); num_reg_temp_mon = ARRAY_SIZE(NCT6779_REG_TEMP_MON); + num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP); reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; @@ -4460,6 +4541,24 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_BEEP = NCT6776_REG_BEEP; else data->REG_BEEP = NCT6792_REG_BEEP; + switch (data->kind) { + case nct6791: + case nct6792: + case nct6793: + data->REG_TSI_TEMP = NCT6776_REG_TSI_TEMP; + num_reg_tsi_temp = ARRAY_SIZE(NCT6776_REG_TSI_TEMP); + break; + case nct6795: + case nct6796: + case nct6797: + case nct6798: + data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP; + num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP); + break; + default: + num_reg_tsi_temp = 0; + break; + } reg_temp = NCT6779_REG_TEMP; num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP); @@ -4659,6 +4758,12 @@ static int nct6775_probe(struct platform_device *pdev) } #endif /* USE_ALTERNATE */ + /* Check which TSIx_TEMP registers are active */ + for (i = 0; i < num_reg_tsi_temp; i++) { + if (data->read_value(data, data->REG_TSI_TEMP[i])) + data->have_tsi_temp |= BIT(i); + } + /* Initialize the chip */ nct6775_init_device(data); @@ -4766,6 +4871,18 @@ static int nct6775_probe(struct platform_device *pdev) return PTR_ERR(group); data->groups[num_attr_groups++] = group; + + if (data->have_tsi_temp) { + tsi_temp_tg.templates = nct6775_tsi_temp_template; + tsi_temp_tg.is_visible = nct6775_tsi_temp_is_visible; + tsi_temp_tg.base = fls(data->have_temp) + 1; + group = nct6775_create_attr_group(dev, &tsi_temp_tg, fls(data->have_tsi_temp)); + if (IS_ERR(group)) + return PTR_ERR(group); + + data->groups[num_attr_groups++] = group; + } + data->groups[num_attr_groups++] = &nct6775_group_other; hwmon_dev = devm_hwmon_device_register_with_groups(dev, data->name, @@ -4985,9 +5102,14 @@ static struct platform_device *pdev[2]; static const char * const asus_wmi_boards[] = { "ProArt X570-CREATOR WIFI", + "Pro B550M-C", "Pro WS X570-ACE", "PRIME B360-PLUS", "PRIME B460-PLUS", + "PRIME B550-PLUS", + "PRIME B550M-A", + "PRIME B550M-A (WI-FI)", + "PRIME X570-P", "PRIME X570-PRO", "ROG CROSSHAIR VIII DARK HERO", "ROG CROSSHAIR VIII FORMULA", @@ -4997,10 +5119,22 @@ static const char * const asus_wmi_boards[] = { "ROG STRIX B550-E GAMING", "ROG STRIX B550-F GAMING", "ROG STRIX B550-F GAMING (WI-FI)", + "ROG STRIX B550-F GAMING WIFI II", "ROG STRIX B550-I GAMING", + "ROG STRIX B550-XE GAMING (WI-FI)", + "ROG STRIX X570-E GAMING", "ROG STRIX X570-F GAMING", "ROG STRIX X570-I GAMING", "ROG STRIX Z390-E GAMING", + "ROG STRIX Z390-F GAMING", + "ROG STRIX Z390-H GAMING", + "ROG STRIX Z390-I GAMING", + "ROG STRIX Z490-A GAMING", + "ROG STRIX Z490-E GAMING", + "ROG STRIX Z490-F GAMING", + "ROG STRIX Z490-G GAMING", + "ROG STRIX Z490-G GAMING (WI-FI)", + "ROG STRIX Z490-H GAMING", "ROG STRIX Z490-I GAMING", "TUF GAMING B550M-PLUS", "TUF GAMING B550M-PLUS (WI-FI)", diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index e0aa8aa46d8c4..ef3a8ecde4dfc 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -319,6 +319,7 @@ enum pmbus_fan_mode { percent = 0, rpm }; /* * STATUS_VOUT, STATUS_INPUT */ +#define PB_VOLTAGE_VIN_OFF BIT(3) #define PB_VOLTAGE_UV_FAULT BIT(4) #define PB_VOLTAGE_UV_WARNING BIT(5) #define PB_VOLTAGE_OV_WARNING BIT(6) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index ac2fbee1ba9c0..ca0bfaf2f6911 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -1373,7 +1373,7 @@ static const struct pmbus_limit_attr vin_limit_attrs[] = { .reg = PMBUS_VIN_UV_FAULT_LIMIT, .attr = "lcrit", .alarm = "lcrit_alarm", - .sbit = PB_VOLTAGE_UV_FAULT, + .sbit = PB_VOLTAGE_UV_FAULT | PB_VOLTAGE_VIN_OFF, }, { .reg = PMBUS_VIN_OV_WARN_LIMIT, .attr = "max", @@ -2391,10 +2391,14 @@ static int pmbus_regulator_is_enabled(struct regulator_dev *rdev) { struct device *dev = rdev_get_dev(rdev); struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); u8 page = rdev_get_id(rdev); int ret; + mutex_lock(&data->update_lock); ret = pmbus_read_byte_data(client, page, PMBUS_OPERATION); + mutex_unlock(&data->update_lock); + if (ret < 0) return ret; @@ -2405,11 +2409,17 @@ static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable) { struct device *dev = rdev_get_dev(rdev); struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); u8 page = rdev_get_id(rdev); + int ret; - return pmbus_update_byte_data(client, page, PMBUS_OPERATION, - PB_OPERATION_CONTROL_ON, - enable ? PB_OPERATION_CONTROL_ON : 0); + mutex_lock(&data->update_lock); + ret = pmbus_update_byte_data(client, page, PMBUS_OPERATION, + PB_OPERATION_CONTROL_ON, + enable ? PB_OPERATION_CONTROL_ON : 0); + mutex_unlock(&data->update_lock); + + return ret; } static int pmbus_regulator_enable(struct regulator_dev *rdev) diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c index 40cdadad35e52..f85eede6d7663 100644 --- a/drivers/hwmon/sch56xx-common.c +++ b/drivers/hwmon/sch56xx-common.c @@ -422,7 +422,7 @@ void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision, data->wddev.max_timeout = 255 * 60; watchdog_set_nowayout(&data->wddev, nowayout); if (output_enable & SCH56XX_WDOG_OUTPUT_ENABLE) - set_bit(WDOG_ACTIVE, &data->wddev.status); + set_bit(WDOG_HW_RUNNING, &data->wddev.status); /* Since the watchdog uses a downcounter there is no register to read the BIOS set timeout from (if any was set at all) -> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index a0640fa5c55bd..57e94424a8d65 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -367,8 +367,12 @@ static ssize_t mode_store(struct device *dev, mode = ETM_MODE_QELEM(config->mode); /* start by clearing QE bits */ config->cfg &= ~(BIT(13) | BIT(14)); - /* if supported, Q elements with instruction counts are enabled */ - if ((mode & BIT(0)) && (drvdata->q_support & BIT(0))) + /* + * if supported, Q elements with instruction counts are enabled. + * Always set the low bit for any requested mode. Valid combos are + * 0b00, 0b01 and 0b11. + */ + if (mode && drvdata->q_support) config->cfg |= BIT(13); /* * if supported, Q elements with and without instruction diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c index 098fc34c48293..11850fd8c3b5b 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg.c +++ b/drivers/hwtracing/coresight/coresight-syscfg.c @@ -1049,7 +1049,7 @@ static int cscfg_create_device(void) err = device_register(dev); if (err) - cscfg_dev_release(dev); + put_device(dev); create_dev_exit_unlock: mutex_unlock(&cscfg_mutex); diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index 5149454eef4a5..f72c6576d8a36 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -454,18 +454,20 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) ret = clk_prepare_enable(i2c_dev->bus_clk); if (ret) { dev_err(&pdev->dev, "Couldn't prepare clock"); - return ret; + goto err_put_exclusive_rate; } i2c_dev->irq = platform_get_irq(pdev, 0); - if (i2c_dev->irq < 0) - return i2c_dev->irq; + if (i2c_dev->irq < 0) { + ret = i2c_dev->irq; + goto err_disable_unprepare_clk; + } ret = request_irq(i2c_dev->irq, bcm2835_i2c_isr, IRQF_SHARED, dev_name(&pdev->dev), i2c_dev); if (ret) { dev_err(&pdev->dev, "Could not request IRQ\n"); - return -ENODEV; + goto err_disable_unprepare_clk; } adap = &i2c_dev->adapter; @@ -489,7 +491,16 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(adap); if (ret) - free_irq(i2c_dev->irq, i2c_dev); + goto err_free_irq; + + return 0; + +err_free_irq: + free_irq(i2c_dev->irq, i2c_dev); +err_disable_unprepare_clk: + clk_disable_unprepare(i2c_dev->bus_clk); +err_put_exclusive_rate: + clk_rate_exclusive_put(i2c_dev->bus_clk); return ret; } diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index ef73a42577cc7..07eb819072c4f 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -465,18 +465,18 @@ static int meson_i2c_probe(struct platform_device *pdev) */ meson_i2c_set_mask(i2c, REG_CTRL, REG_CTRL_START, 0); - ret = i2c_add_adapter(&i2c->adap); - if (ret < 0) { - clk_disable_unprepare(i2c->clk); - return ret; - } - /* Disable filtering */ meson_i2c_set_mask(i2c, REG_SLAVE_ADDR, REG_SLV_SDA_FILTER | REG_SLV_SCL_FILTER, 0); meson_i2c_set_clk_div(i2c, timings.bus_freq_hz); + ret = i2c_add_adapter(&i2c->adap); + if (ret < 0) { + clk_disable_unprepare(i2c->clk); + return ret; + } + return 0; } diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 4e161a4089d85..7728c8460dc0f 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -333,7 +333,6 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) smbus->adapter.owner = THIS_MODULE; snprintf(smbus->adapter.name, sizeof(smbus->adapter.name), "PA Semi SMBus adapter (%s)", dev_name(smbus->dev)); - smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; smbus->adapter.algo = &smbus_algorithm; smbus->adapter.algo_data = smbus; diff --git a/drivers/i2c/busses/i2c-pasemi-pci.c b/drivers/i2c/busses/i2c-pasemi-pci.c index 1ab1f28744fb2..cfc89e04eb94c 100644 --- a/drivers/i2c/busses/i2c-pasemi-pci.c +++ b/drivers/i2c/busses/i2c-pasemi-pci.c @@ -56,6 +56,7 @@ static int pasemi_smb_pci_probe(struct pci_dev *dev, if (!smbus->ioaddr) return -EBUSY; + smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD; error = pasemi_i2c_common_probe(smbus); if (error) return error; diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index eb789cfb99739..ffefe3c482e9c 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -734,7 +734,6 @@ static const struct i2c_adapter_quirks xiic_quirks = { static const struct i2c_adapter xiic_adapter = { .owner = THIS_MODULE, - .name = DRIVER_NAME, .class = I2C_CLASS_DEPRECATED, .algo = &xiic_algorithm, .quirks = &xiic_quirks, @@ -771,6 +770,8 @@ static int xiic_i2c_probe(struct platform_device *pdev) i2c_set_adapdata(&i2c->adap, i2c); i2c->adap.dev.parent = &pdev->dev; i2c->adap.dev.of_node = pdev->dev.of_node; + snprintf(i2c->adap.name, sizeof(i2c->adap.name), + DRIVER_NAME " %s", pdev->name); mutex_init(&i2c->lock); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 2c59dd748a49f..121bbad56cfac 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1424,7 +1424,7 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr) if (irq <= 0) return -ENXIO; - generic_handle_irq(irq); + generic_dispatch_irq(irq); return 0; } diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 5365199a31f41..f7a7405d4350a 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -261,7 +261,7 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) err = device_create_file(&pdev->dev, &dev_attr_available_masters); if (err) - goto err_rollback; + goto err_rollback_activation; err = device_create_file(&pdev->dev, &dev_attr_current_master); if (err) @@ -271,8 +271,9 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) err_rollback_available: device_remove_file(&pdev->dev, &dev_attr_available_masters); -err_rollback: +err_rollback_activation: i2c_demux_deactivate_master(priv); +err_rollback: for (j = 0; j < i; j++) { of_node_put(priv->chan[j].parent_np); of_changeset_destroy(&priv->chan[j].chgset); diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 64b82b4503ada..a21fdb015c6c0 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -176,6 +176,7 @@ static const struct mma8452_event_regs trans_ev_regs = { * @enabled_events: event flags enabled and handled by this driver */ struct mma_chip_info { + const char *name; u8 chip_id; const struct iio_chan_spec *channels; int num_channels; @@ -379,8 +380,8 @@ static ssize_t mma8452_show_scale_avail(struct device *dev, struct device_attribute *attr, char *buf) { - struct mma8452_data *data = iio_priv(i2c_get_clientdata( - to_i2c_client(dev))); + struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct mma8452_data *data = iio_priv(indio_dev); return mma8452_show_int_plus_micros(buf, data->chip_info->mma_scales, ARRAY_SIZE(data->chip_info->mma_scales)); @@ -1301,6 +1302,7 @@ enum { static const struct mma_chip_info mma_chip_info_table[] = { [mma8451] = { + .name = "mma8451", .chip_id = MMA8451_DEVICE_ID, .channels = mma8451_channels, .num_channels = ARRAY_SIZE(mma8451_channels), @@ -1325,6 +1327,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { MMA8452_INT_FF_MT, }, [mma8452] = { + .name = "mma8452", .chip_id = MMA8452_DEVICE_ID, .channels = mma8452_channels, .num_channels = ARRAY_SIZE(mma8452_channels), @@ -1341,6 +1344,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { MMA8452_INT_FF_MT, }, [mma8453] = { + .name = "mma8453", .chip_id = MMA8453_DEVICE_ID, .channels = mma8453_channels, .num_channels = ARRAY_SIZE(mma8453_channels), @@ -1357,6 +1361,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { MMA8452_INT_FF_MT, }, [mma8652] = { + .name = "mma8652", .chip_id = MMA8652_DEVICE_ID, .channels = mma8652_channels, .num_channels = ARRAY_SIZE(mma8652_channels), @@ -1366,6 +1371,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { .enabled_events = MMA8452_INT_FF_MT, }, [mma8653] = { + .name = "mma8653", .chip_id = MMA8653_DEVICE_ID, .channels = mma8653_channels, .num_channels = ARRAY_SIZE(mma8653_channels), @@ -1380,6 +1386,7 @@ static const struct mma_chip_info mma_chip_info_table[] = { .enabled_events = MMA8452_INT_FF_MT, }, [fxls8471] = { + .name = "fxls8471", .chip_id = FXLS8471_DEVICE_ID, .channels = mma8451_channels, .num_channels = ARRAY_SIZE(mma8451_channels), @@ -1522,13 +1529,6 @@ static int mma8452_probe(struct i2c_client *client, struct mma8452_data *data; struct iio_dev *indio_dev; int ret; - const struct of_device_id *match; - - match = of_match_device(mma8452_dt_ids, &client->dev); - if (!match) { - dev_err(&client->dev, "unknown device model\n"); - return -ENODEV; - } indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data)); if (!indio_dev) @@ -1537,7 +1537,14 @@ static int mma8452_probe(struct i2c_client *client, data = iio_priv(indio_dev); data->client = client; mutex_init(&data->lock); - data->chip_info = match->data; + + data->chip_info = device_get_match_data(&client->dev); + if (!data->chip_info && id) { + data->chip_info = &mma_chip_info_table[id->driver_data]; + } else { + dev_err(&client->dev, "unknown device model\n"); + return -ENODEV; + } data->vdd_reg = devm_regulator_get(&client->dev, "vdd"); if (IS_ERR(data->vdd_reg)) @@ -1581,11 +1588,11 @@ static int mma8452_probe(struct i2c_client *client, } dev_info(&client->dev, "registering %s accelerometer; ID 0x%x\n", - match->compatible, data->chip_info->chip_id); + data->chip_info->name, data->chip_info->chip_id); i2c_set_clientdata(client, indio_dev); indio_dev->info = &mma8452_info; - indio_dev->name = id->name; + indio_dev->name = data->chip_info->name; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = data->chip_info->channels; indio_dev->num_channels = data->chip_info->num_channels; @@ -1810,7 +1817,7 @@ MODULE_DEVICE_TABLE(i2c, mma8452_id); static struct i2c_driver mma8452_driver = { .driver = { .name = "mma8452", - .of_match_table = of_match_ptr(mma8452_dt_ids), + .of_match_table = mma8452_dt_ids, .pm = &mma8452_pm_ops, }, .probe = mma8452_probe, diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index e939b84cbb561..0793d2474cdcf 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -539,7 +539,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) data->clk_scaler = devm_clk_hw_register_divider( &pdev->dev, clk_name, clk_parent_name, scaler_flags, data->base + ASPEED_REG_CLOCK_CONTROL, 0, - data->model_data->scaler_bit_width, 0, &data->clk_lock); + data->model_data->scaler_bit_width, + data->model_data->need_prescaler ? CLK_DIVIDER_ONE_BASED : 0, + &data->clk_lock); if (IS_ERR(data->clk_scaler)) return PTR_ERR(data->clk_scaler); diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c index afdb59e0b5267..d0223e39d59af 100644 --- a/drivers/iio/adc/twl6030-gpadc.c +++ b/drivers/iio/adc/twl6030-gpadc.c @@ -911,6 +911,8 @@ static int twl6030_gpadc_probe(struct platform_device *pdev) ret = devm_request_threaded_irq(dev, irq, NULL, twl6030_gpadc_irq_handler, IRQF_ONESHOT, "twl6030_gpadc", indio_dev); + if (ret) + return ret; ret = twl6030_gpadc_enable_irq(TWL6030_GPADC_RT_SW1_EOC_MASK); if (ret < 0) { diff --git a/drivers/iio/adc/xilinx-ams.c b/drivers/iio/adc/xilinx-ams.c index 8343c5f74121e..7bf097fa10cb7 100644 --- a/drivers/iio/adc/xilinx-ams.c +++ b/drivers/iio/adc/xilinx-ams.c @@ -91,8 +91,8 @@ #define AMS_CONF1_SEQ_MASK GENMASK(15, 12) #define AMS_CONF1_SEQ_DEFAULT FIELD_PREP(AMS_CONF1_SEQ_MASK, 0) -#define AMS_CONF1_SEQ_CONTINUOUS FIELD_PREP(AMS_CONF1_SEQ_MASK, 1) -#define AMS_CONF1_SEQ_SINGLE_CHANNEL FIELD_PREP(AMS_CONF1_SEQ_MASK, 2) +#define AMS_CONF1_SEQ_CONTINUOUS FIELD_PREP(AMS_CONF1_SEQ_MASK, 2) +#define AMS_CONF1_SEQ_SINGLE_CHANNEL FIELD_PREP(AMS_CONF1_SEQ_MASK, 3) #define AMS_REG_SEQ0_MASK GENMASK(15, 0) #define AMS_REG_SEQ2_MASK GENMASK(21, 16) @@ -530,14 +530,18 @@ static int ams_enable_single_channel(struct ams *ams, unsigned int offset) return -EINVAL; } - /* set single channel, sequencer off mode */ + /* put sysmon in a soft reset to change the sequence */ ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK, - AMS_CONF1_SEQ_SINGLE_CHANNEL); + AMS_CONF1_SEQ_DEFAULT); /* write the channel number */ ams_ps_update_reg(ams, AMS_REG_CONFIG0, AMS_CONF0_CHANNEL_NUM_MASK, channel_num); + /* set single channel, sequencer off mode */ + ams_ps_update_reg(ams, AMS_REG_CONFIG1, AMS_CONF1_SEQ_MASK, + AMS_CONF1_SEQ_SINGLE_CHANNEL); + return 0; } @@ -551,6 +555,8 @@ static int ams_read_vcc_reg(struct ams *ams, unsigned int offset, u32 *data) if (ret) return ret; + /* clear end-of-conversion flag, wait for next conversion to complete */ + writel(expect, ams->base + AMS_ISR_1); ret = readl_poll_timeout(ams->base + AMS_ISR_1, reg, (reg & expect), AMS_INIT_POLL_TIME_US, AMS_INIT_TIMEOUT_US); if (ret) @@ -1224,6 +1230,7 @@ static int ams_init_module(struct iio_dev *indio_dev, /* add PS channels to iio device channels */ memcpy(channels, ams_ps_channels, sizeof(ams_ps_channels)); + num_channels = ARRAY_SIZE(ams_ps_channels); } else if (fwnode_property_match_string(fwnode, "compatible", "xlnx,zynqmp-ams-pl") == 0) { ams->pl_base = fwnode_iomap(fwnode, 0); diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c index 774eb3044edd8..271d73e420c42 100644 --- a/drivers/iio/afe/iio-rescale.c +++ b/drivers/iio/afe/iio-rescale.c @@ -39,7 +39,7 @@ static int rescale_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct rescale *rescale = iio_priv(indio_dev); - unsigned long long tmp; + s64 tmp; int ret; switch (mask) { @@ -77,10 +77,10 @@ static int rescale_read_raw(struct iio_dev *indio_dev, *val2 = rescale->denominator; return IIO_VAL_FRACTIONAL; case IIO_VAL_FRACTIONAL_LOG2: - tmp = *val * 1000000000LL; - do_div(tmp, rescale->denominator); + tmp = (s64)*val * 1000000000LL; + tmp = div_s64(tmp, rescale->denominator); tmp *= rescale->numerator; - do_div(tmp, 1000000000LL); + tmp = div_s64(tmp, 1000000000LL); *val = tmp; return ret; default: diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 93f0c6bce502c..b1d8d5a66f01f 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -1633,7 +1633,7 @@ st_lsm6dsx_sysfs_sampling_frequency_avail(struct device *dev, struct device_attribute *attr, char *buf) { - struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev)); + struct st_lsm6dsx_sensor *sensor = iio_priv(dev_to_iio_dev(dev)); const struct st_lsm6dsx_odr_table_entry *odr_table; int i, len = 0; @@ -1651,7 +1651,7 @@ static ssize_t st_lsm6dsx_sysfs_scale_avail(struct device *dev, struct device_attribute *attr, char *buf) { - struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev)); + struct st_lsm6dsx_sensor *sensor = iio_priv(dev_to_iio_dev(dev)); const struct st_lsm6dsx_fs_table_entry *fs_table; struct st_lsm6dsx_hw *hw = sensor->hw; int i, len = 0; diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 0222885b334c1..df74765d33dcb 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -595,28 +595,50 @@ EXPORT_SYMBOL_GPL(iio_read_channel_average_raw); static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan, int raw, int *processed, unsigned int scale) { - int scale_type, scale_val, scale_val2, offset; + int scale_type, scale_val, scale_val2; + int offset_type, offset_val, offset_val2; s64 raw64 = raw; - int ret; - ret = iio_channel_read(chan, &offset, NULL, IIO_CHAN_INFO_OFFSET); - if (ret >= 0) - raw64 += offset; + offset_type = iio_channel_read(chan, &offset_val, &offset_val2, + IIO_CHAN_INFO_OFFSET); + if (offset_type >= 0) { + switch (offset_type) { + case IIO_VAL_INT: + break; + case IIO_VAL_INT_PLUS_MICRO: + case IIO_VAL_INT_PLUS_NANO: + /* + * Both IIO_VAL_INT_PLUS_MICRO and IIO_VAL_INT_PLUS_NANO + * implicitely truncate the offset to it's integer form. + */ + break; + case IIO_VAL_FRACTIONAL: + offset_val /= offset_val2; + break; + case IIO_VAL_FRACTIONAL_LOG2: + offset_val >>= offset_val2; + break; + default: + return -EINVAL; + } + + raw64 += offset_val; + } scale_type = iio_channel_read(chan, &scale_val, &scale_val2, IIO_CHAN_INFO_SCALE); if (scale_type < 0) { /* - * Just pass raw values as processed if no scaling is - * available. + * If no channel scaling is available apply consumer scale to + * raw value and return. */ - *processed = raw; + *processed = raw * scale; return 0; } switch (scale_type) { case IIO_VAL_INT: - *processed = raw64 * scale_val; + *processed = raw64 * scale_val * scale; break; case IIO_VAL_INT_PLUS_MICRO: if (scale_val2 < 0) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 50c53409ceb61..fabca5e51e3d4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2642,7 +2642,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout) { struct rdma_id_private *id_priv; - if (id->qp_type != IB_QPT_RC) + if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI) return -EINVAL; id_priv = container_of(id, struct rdma_id_private, id); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index f5aacaf7fb8ef..ca24ce34da766 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1951,9 +1951,10 @@ static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], u32 port) { struct rdma_hw_stats *stats; - int rem, i, index, ret = 0; struct nlattr *entry_attr; unsigned long *target; + int rem, i, ret = 0; + u32 index; stats = ib_get_hw_stats_port(device, port); if (!stats) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c18634bec2126..e821dc94a43ed 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2153,6 +2153,7 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return mr; mr->device = pd->device; + mr->type = IB_MR_TYPE_USER; mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index dc9211f3a0098..99d0743133cac 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1397,8 +1397,7 @@ static int query_port(struct rvt_dev_info *rdi, u32 port_num, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); - props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu : - ib_mtu_enum_to_int(props->max_mtu); + props->phys_mtu = hfi1_max_mtu; return 0; } diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 3141a9c85de5a..e7554b6043e4b 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -433,7 +433,7 @@ enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_c cqp = qp->dev->cqp; if (qp->qp_uk.qp_id < cqp->dev->hw_attrs.min_hw_qp_id || - qp->qp_uk.qp_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt - 1)) + qp->qp_uk.qp_id >= (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt)) return IRDMA_ERR_INVALID_QP_ID; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); @@ -2512,10 +2512,10 @@ static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq, enum irdma_status_code ret_code = 0; cqp = cq->dev->cqp; - if (cq->cq_uk.cq_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt - 1)) + if (cq->cq_uk.cq_id >= (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt)) return IRDMA_ERR_INVALID_CQ_ID; - if (cq->ceq_id > (cq->dev->hmc_fpm_misc.max_ceqs - 1)) + if (cq->ceq_id >= (cq->dev->hmc_fpm_misc.max_ceqs)) return IRDMA_ERR_INVALID_CEQ_ID; ceq = cq->dev->ceq[cq->ceq_id]; @@ -3617,7 +3617,7 @@ enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, info->elem_cnt > info->dev->hw_attrs.max_hw_ceq_size) return IRDMA_ERR_INVALID_SIZE; - if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) + if (info->ceq_id >= (info->dev->hmc_fpm_misc.max_ceqs)) return IRDMA_ERR_INVALID_CEQ_ID; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; @@ -4166,7 +4166,7 @@ enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *cq, info->num_elem > info->dev->hw_attrs.uk_attrs.max_hw_cq_size) return IRDMA_ERR_INVALID_SIZE; - if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) + if (info->ceq_id >= (info->dev->hmc_fpm_misc.max_ceqs )) return IRDMA_ERR_INVALID_CEQ_ID; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 89234d04cc652..e46e3240cc9fd 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1608,7 +1608,7 @@ static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf) info.fpm_commit_buf = mem.va; info.bar0 = rf->hw.hw_addr; - info.hmc_fn_id = PCI_FUNC(rf->pcidev->devfn); + info.hmc_fn_id = rf->pf_id; info.hw = &rf->hw; status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info); if (status) diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c index 43e962b97d6a3..0886783db647c 100644 --- a/drivers/infiniband/hw/irdma/i40iw_if.c +++ b/drivers/infiniband/hw/irdma/i40iw_if.c @@ -77,6 +77,7 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info rf->rdma_ver = IRDMA_GEN_1; rf->gen_ops.request_reset = i40iw_request_reset; rf->pcidev = cdev_info->pcidev; + rf->pf_id = cdev_info->fid; rf->hw.hw_addr = cdev_info->hw_addr; rf->cdev = cdev_info; rf->msix_count = cdev_info->msix_count; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 9fab29039f1c0..5e8e8860686dc 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -226,6 +226,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf rf->hw.hw_addr = pf->hw.hw_addr; rf->pcidev = pf->pdev; rf->msix_count = pf->num_rdma_msix; + rf->pf_id = pf->hw.pf_id; rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector]; rf->default_vsi.vsi_idx = vsi->vsi_num; rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index cb218cab79ac1..fb7faa85e4c9d 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -257,6 +257,7 @@ struct irdma_pci_f { u8 *mem_rsrc; u8 rdma_ver; u8 rst_to; + u8 pf_id; enum irdma_protocol_used protocol_used; u32 sd_type; u32 msix_count; diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 398736d8c78a4..e81b74a518dd0 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -150,31 +150,35 @@ int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct in_ifaddr *ifa = ptr; - struct net_device *netdev = ifa->ifa_dev->dev; + struct net_device *real_dev, *netdev = ifa->ifa_dev->dev; struct irdma_device *iwdev; struct ib_device *ibdev; u32 local_ipaddr; - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; iwdev = to_iwdev(ibdev); local_ipaddr = ntohl(ifa->ifa_address); ibdev_dbg(&iwdev->ibdev, - "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", netdev, - event, &local_ipaddr, netdev->dev_addr); + "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", real_dev, + event, &local_ipaddr, real_dev->dev_addr); switch (event) { case NETDEV_DOWN: - irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr, + irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr, &local_ipaddr, true, IRDMA_ARP_DELETE); - irdma_if_notify(iwdev, netdev, &local_ipaddr, true, false); + irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, false); irdma_gid_change_event(&iwdev->ibdev); break; case NETDEV_UP: case NETDEV_CHANGEADDR: - irdma_add_arp(iwdev->rf, &local_ipaddr, true, netdev->dev_addr); - irdma_if_notify(iwdev, netdev, &local_ipaddr, true, true); + irdma_add_arp(iwdev->rf, &local_ipaddr, true, real_dev->dev_addr); + irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, true); irdma_gid_change_event(&iwdev->ibdev); break; default: @@ -196,32 +200,36 @@ int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; - struct net_device *netdev = ifa->idev->dev; + struct net_device *real_dev, *netdev = ifa->idev->dev; struct irdma_device *iwdev; struct ib_device *ibdev; u32 local_ipaddr6[4]; - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; iwdev = to_iwdev(ibdev); irdma_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); ibdev_dbg(&iwdev->ibdev, - "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", netdev, - event, local_ipaddr6, netdev->dev_addr); + "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", real_dev, + event, local_ipaddr6, real_dev->dev_addr); switch (event) { case NETDEV_DOWN: - irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr, + irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr, local_ipaddr6, false, IRDMA_ARP_DELETE); - irdma_if_notify(iwdev, netdev, local_ipaddr6, false, false); + irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, false); irdma_gid_change_event(&iwdev->ibdev); break; case NETDEV_UP: case NETDEV_CHANGEADDR: irdma_add_arp(iwdev->rf, local_ipaddr6, false, - netdev->dev_addr); - irdma_if_notify(iwdev, netdev, local_ipaddr6, false, true); + real_dev->dev_addr); + irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, true); irdma_gid_change_event(&iwdev->ibdev); break; default: @@ -243,14 +251,18 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct neighbour *neigh = ptr; + struct net_device *real_dev, *netdev = (struct net_device *)neigh->dev; struct irdma_device *iwdev; struct ib_device *ibdev; __be32 *p; u32 local_ipaddr[4] = {}; bool ipv4 = true; - ibdev = ib_device_get_by_netdev((struct net_device *)neigh->dev, - RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 460e757d3fe61..1bf6404ec8340 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2509,7 +2509,7 @@ static int irdma_dealloc_mw(struct ib_mw *ibmw) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); - info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff; + info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = ibmw->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = false; cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; @@ -3021,7 +3021,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); - info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff; + info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = true; if (iwpbl->pbl_allocated) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 08b7f6bc56c37..15c0884d1f498 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1886,8 +1886,10 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, key_level2, obj_event, GFP_KERNEL); - if (err) + if (err) { + kfree(obj_event); return err; + } INIT_LIST_HEAD(&obj_event->obj_sub_list); } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 157d862fb8642..2910d78333130 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -585,6 +585,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, ent = &cache->ent[entry]; spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { + queue_adjust_cache_locked(ent); + ent->miss++; spin_unlock_irq(&ent->lock); mr = create_cache_mr(ent); if (IS_ERR(mr)) diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 38c7b6fb39d70..360a567159fe5 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -99,11 +99,14 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) av->network_type = type; } -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp) { struct rxe_ah *ah; u32 ah_num; + if (ahp) + *ahp = NULL; + if (!pkt || !pkt->qp) return NULL; @@ -117,10 +120,22 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) if (ah_num) { /* only new user provider or kernel client */ ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); - if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) { + if (!ah) { pr_warn("Unable to find AH matching ah_num\n"); return NULL; } + + if (rxe_ah_pd(ah) != pkt->qp->pd) { + pr_warn("PDs don't match for AH and QP\n"); + rxe_drop_ref(ah); + return NULL; + } + + if (ahp) + *ahp = ah; + else + rxe_drop_ref(ah); + return &ah->av; } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index b1e174afb1d49..b92bb7a152905 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -19,7 +19,7 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp); /* rxe_cq.c */ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, @@ -102,7 +102,8 @@ void rxe_mw_cleanup(struct rxe_pool_elem *arg); /* rxe_net.c */ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb); +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb); int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct sk_buff *skb); const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index be72bdbfb4ba7..580cfd742dd2f 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -289,13 +289,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); - struct rxe_av *av = rxe_get_av(pkt); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; @@ -315,11 +315,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; - struct rxe_av *av = rxe_get_av(pkt); struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; @@ -340,16 +340,17 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb) +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { int err = 0; if (skb->protocol == htons(ETH_P_IP)) - err = prepare4(pkt, skb); + err = prepare4(av, pkt, skb); else if (skb->protocol == htons(ETH_P_IPV6)) - err = prepare6(pkt, skb); + err = prepare6(av, pkt, skb); - if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac)) + if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) pkt->mask |= RXE_LOOPBACK_MASK; return err; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 5eb89052dd668..204e31bbd61f7 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -358,14 +358,14 @@ static inline int get_mtu(struct rxe_qp *qp) } static struct sk_buff *init_req_packet(struct rxe_qp *qp, + struct rxe_av *av, struct rxe_send_wqe *wqe, - int opcode, int payload, + int opcode, u32 payload, struct rxe_pkt_info *pkt) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct sk_buff *skb; struct rxe_send_wr *ibwr = &wqe->wr; - struct rxe_av *av; int pad = (-payload) & 0x3; int paylen; int solicited; @@ -374,21 +374,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - - /* pkt->hdr, port_num and mask are initialized in ifc layer */ - pkt->rxe = rxe; - pkt->opcode = opcode; - pkt->qp = qp; - pkt->psn = qp->req.psn; - pkt->mask = rxe_opcode[opcode].mask; - pkt->paylen = paylen; - pkt->wqe = wqe; + pkt->paylen = paylen; /* init skb */ - av = rxe_get_av(pkt); - if (!av) - return NULL; - skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; @@ -447,13 +435,13 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, return skb; } -static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, struct sk_buff *skb, - int paylen) +static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, + struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 paylen) { int err; - err = rxe_prepare(pkt, skb); + err = rxe_prepare(av, pkt, skb); if (err) return err; @@ -497,7 +485,7 @@ static void update_wqe_state(struct rxe_qp *qp, static void update_wqe_psn(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, - int payload) + u32 payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -540,7 +528,7 @@ static void rollback_state(struct rxe_send_wqe *wqe, } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) + struct rxe_pkt_info *pkt, u32 payload) { qp->req.opcode = pkt->opcode; @@ -608,17 +596,20 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) int rxe_requester(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; enum rxe_hdr_mask mask; - int payload; + u32 payload; int mtu; int opcode; int ret; struct rxe_send_wqe rollback_wqe; u32 rollback_psn; struct rxe_queue *q = qp->sq.queue; + struct rxe_ah *ah; + struct rxe_av *av; rxe_add_ref(qp); @@ -705,14 +696,28 @@ int rxe_requester(void *arg) payload = mtu; } - skb = init_req_packet(qp, wqe, opcode, payload, &pkt); + pkt.rxe = rxe; + pkt.opcode = opcode; + pkt.qp = qp; + pkt.psn = qp->req.psn; + pkt.mask = rxe_opcode[opcode].mask; + pkt.wqe = wqe; + + av = rxe_get_av(&pkt, &ah); + if (unlikely(!av)) { + pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err_drop_ah; + } + + skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; + goto err_drop_ah; } - ret = finish_packet(qp, wqe, &pkt, skb, payload); + ret = finish_packet(qp, av, wqe, &pkt, skb, payload); if (unlikely(ret)) { pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); if (ret == -EFAULT) @@ -720,9 +725,12 @@ int rxe_requester(void *arg) else wqe->status = IB_WC_LOC_QP_OP_ERR; kfree_skb(skb); - goto err; + goto err_drop_ah; } + if (ah) + rxe_drop_ref(ah); + /* * To prevent a race on wqe access between requester and completer, * wqe members state and psn need to be set before calling @@ -751,6 +759,9 @@ int rxe_requester(void *arg) goto next_wqe; +err_drop_ah: + if (ah) + rxe_drop_ref(ah); err: wqe->state = wqe_state_error; __rxe_do_task(&qp->comp.task); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index e8f435fa6e4d7..192cb9a096a14 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -632,7 +632,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, if (ack->mask & RXE_ATMACK_MASK) atmack_set_orig(ack, qp->resp.atomic_orig); - err = rxe_prepare(ack, skb); + err = rxe_prepare(&qp->pri_av, ack, skb); if (err) { kfree_skb(skb); return NULL; @@ -814,6 +814,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_ERR_INVALIDATE_RKEY; } + if (pkt->mask & RXE_END_MASK) + /* We successfully processed this new request. */ + qp->resp.msn++; + /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.ack_psn = qp->resp.psn; @@ -821,11 +825,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) { - /* We successfully processed this new request. */ - qp->resp.msn++; + if (pkt->mask & RXE_COMP_MASK) return RESPST_COMPLETE; - } else if (qp_type(qp) == IB_QPT_RC) + else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; diff --git a/drivers/input/input.c b/drivers/input/input.c index c3139bc2aa0db..ccaeb24263854 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -2285,12 +2285,6 @@ int input_register_device(struct input_dev *dev) /* KEY_RESERVED is not supposed to be transmitted to userspace. */ __clear_bit(KEY_RESERVED, dev->keybit); - /* Buttonpads should not map BTN_RIGHT and/or BTN_MIDDLE. */ - if (test_bit(INPUT_PROP_BUTTONPAD, dev->propbit)) { - __clear_bit(BTN_RIGHT, dev->keybit); - __clear_bit(BTN_MIDDLE, dev->keybit); - } - /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b28c9435b898d..170e0f33040e8 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -95,10 +95,11 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = to_iova(iovad->cached32_node); if (free == cached_iova || (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo)) { + free->pfn_lo >= cached_iova->pfn_lo)) iovad->cached32_node = rb_next(&free->node); + + if (free->pfn_lo < iovad->dma_32bit_pfn) iovad->max32_alloc_size = iovad->dma_32bit_pfn; - } cached_iova = to_iova(iovad->cached_node); if (free->pfn_lo >= cached_iova->pfn_lo) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index ca752bdc710f6..61bd9a3004ede 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1006,7 +1006,9 @@ static int ipmmu_probe(struct platform_device *pdev) bitmap_zero(mmu->ctx, IPMMU_CTX_MAX); mmu->features = of_device_get_match_data(&pdev->dev); memset(mmu->utlb_ctx, IPMMU_CTX_INVALID, mmu->features->num_utlbs); - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; /* Map I/O memory and request IRQ. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 25b834104790c..5971a11686662 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -562,22 +562,52 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_data *data; + struct device_link *link; + struct device *larbdev; + unsigned int larbid, larbidx, i; if (!fwspec || fwspec->ops != &mtk_iommu_ops) return ERR_PTR(-ENODEV); /* Not a iommu client device */ data = dev_iommu_priv_get(dev); + /* + * Link the consumer device with the smi-larb device(supplier). + * The device that connects with each a larb is a independent HW. + * All the ports in each a device should be in the same larbs. + */ + larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); + for (i = 1; i < fwspec->num_ids; i++) { + larbidx = MTK_M4U_TO_LARB(fwspec->ids[i]); + if (larbid != larbidx) { + dev_err(dev, "Can only use one larb. Fail@larb%d-%d.\n", + larbid, larbidx); + return ERR_PTR(-EINVAL); + } + } + larbdev = data->larb_imu[larbid].dev; + link = device_link_add(dev, larbdev, + DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); + if (!link) + dev_err(dev, "Unable to link %s\n", dev_name(larbdev)); return &data->iommu; } static void mtk_iommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct mtk_iommu_data *data; + struct device *larbdev; + unsigned int larbid; if (!fwspec || fwspec->ops != &mtk_iommu_ops) return; + data = dev_iommu_priv_get(dev); + larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); + larbdev = data->larb_imu[larbid].dev; + device_link_remove(dev, larbdev); + iommu_fwspec_free(dev); } @@ -848,7 +878,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) plarbdev = of_find_device_by_node(larbnode); if (!plarbdev) { of_node_put(larbnode); - return -EPROBE_DEFER; + return -ENODEV; } data->larb_imu[id].dev = &plarbdev->dev; diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index be22fcf988cee..bc7ee90b9373d 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -423,7 +423,18 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct of_phandle_args iommu_spec; struct mtk_iommu_data *data; - int err, idx = 0; + int err, idx = 0, larbid, larbidx; + struct device_link *link; + struct device *larbdev; + + /* + * In the deferred case, free the existed fwspec. + * Always initialize the fwspec internally. + */ + if (fwspec) { + iommu_fwspec_free(dev); + fwspec = dev_iommu_fwspec_get(dev); + } while (!of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells", @@ -444,6 +455,23 @@ static struct iommu_device *mtk_iommu_probe_device(struct device *dev) data = dev_iommu_priv_get(dev); + /* Link the consumer device with the smi-larb device(supplier) */ + larbid = mt2701_m4u_to_larb(fwspec->ids[0]); + for (idx = 1; idx < fwspec->num_ids; idx++) { + larbidx = mt2701_m4u_to_larb(fwspec->ids[idx]); + if (larbid != larbidx) { + dev_err(dev, "Can only use one larb. Fail@larb%d-%d.\n", + larbid, larbidx); + return ERR_PTR(-EINVAL); + } + } + + larbdev = data->larb_imu[larbid].dev; + link = device_link_add(dev, larbdev, + DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); + if (!link) + dev_err(dev, "Unable to link %s\n", dev_name(larbdev)); + return &data->iommu; } @@ -464,10 +492,18 @@ static void mtk_iommu_probe_finalize(struct device *dev) static void mtk_iommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct mtk_iommu_data *data; + struct device *larbdev; + unsigned int larbid; if (!fwspec || fwspec->ops != &mtk_iommu_ops) return; + data = dev_iommu_priv_get(dev); + larbid = mt2701_m4u_to_larb(fwspec->ids[0]); + larbdev = data->larb_imu[larbid].dev; + device_link_remove(dev, larbdev); + iommu_fwspec_free(dev); } @@ -595,7 +631,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) plarbdev = of_find_device_by_node(larbnode); if (!plarbdev) { of_node_put(larbnode); - return -EPROBE_DEFER; + return -ENODEV; } data->larb_imu[i].dev = &plarbdev->dev; diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index ba4759b3e2693..94230306e0eee 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -107,6 +107,7 @@ static int __init nvic_of_init(struct device_node *node, if (!nvic_irq_domain) { pr_warn("Failed to allocate irq domain\n"); + iounmap(nvic_base); return -ENOMEM; } @@ -116,6 +117,7 @@ static int __init nvic_of_init(struct device_node *node, if (ret) { pr_warn("Failed to allocate irq chips\n"); irq_domain_remove(nvic_irq_domain); + iounmap(nvic_base); return ret; } diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 173e6520e06ec..c0b457f26ec41 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -56,17 +56,18 @@ static u32 pdc_reg_read(int reg, u32 i) static void pdc_enable_intr(struct irq_data *d, bool on) { int pin_out = d->hwirq; + unsigned long flags; u32 index, mask; u32 enable; index = pin_out / 32; mask = pin_out % 32; - raw_spin_lock(&pdc_lock); + raw_spin_lock_irqsave(&pdc_lock, flags); enable = pdc_reg_read(IRQ_ENABLE_BANK, index); enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask); pdc_reg_write(IRQ_ENABLE_BANK, index, enable); - raw_spin_unlock(&pdc_lock); + raw_spin_unlock_irqrestore(&pdc_lock, flags); } static void qcom_pdc_gic_disable(struct irq_data *d) diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c index 544de2db64531..a0c252415c868 100644 --- a/drivers/mailbox/imx-mailbox.c +++ b/drivers/mailbox/imx-mailbox.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #define IMX_MU_CHANS 16 @@ -76,6 +77,7 @@ struct imx_mu_priv { const struct imx_mu_dcfg *dcfg; struct clk *clk; int irq; + bool suspend; u32 xcr[4]; @@ -334,6 +336,9 @@ static irqreturn_t imx_mu_isr(int irq, void *p) return IRQ_NONE; } + if (priv->suspend) + pm_system_wakeup(); + return IRQ_HANDLED; } @@ -702,6 +707,8 @@ static int __maybe_unused imx_mu_suspend_noirq(struct device *dev) priv->xcr[i] = imx_mu_read(priv, priv->dcfg->xCR[i]); } + priv->suspend = true; + return 0; } @@ -718,11 +725,13 @@ static int __maybe_unused imx_mu_resume_noirq(struct device *dev) * send failed, may lead to system freeze. This issue * is observed by testing freeze mode suspend. */ - if (!imx_mu_read(priv, priv->dcfg->xCR[0]) && !priv->clk) { + if (!priv->clk && !imx_mu_read(priv, priv->dcfg->xCR[0])) { for (i = 0; i < IMX_MU_xCR_MAX; i++) imx_mu_write(priv, priv->xcr[i], priv->dcfg->xCR[i]); } + priv->suspend = false; + return 0; } diff --git a/drivers/mailbox/tegra-hsp.c b/drivers/mailbox/tegra-hsp.c index acd0675da681e..78f7265039c66 100644 --- a/drivers/mailbox/tegra-hsp.c +++ b/drivers/mailbox/tegra-hsp.c @@ -412,6 +412,11 @@ static int tegra_hsp_mailbox_flush(struct mbox_chan *chan, value = tegra_hsp_channel_readl(ch, HSP_SM_SHRD_MBOX); if ((value & HSP_SM_SHRD_MBOX_FULL) == 0) { mbox_chan_txdone(chan, 0); + + /* Wait until channel is empty */ + if (chan->active_req != NULL) + continue; + return 0; } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 88c573eeb5982..ad9f16689419d 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2060,9 +2060,11 @@ int bch_btree_check(struct cache_set *c) } } + /* + * Must wait for all threads to stop. + */ wait_event_interruptible(check_state->wait, - atomic_read(&check_state->started) == 0 || - test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + atomic_read(&check_state->started) == 0); for (i = 0; i < check_state->total_threads; i++) { if (check_state->infos[i].result) { diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index c7560f66dca88..68d3dd6b4f119 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -998,9 +998,11 @@ void bch_sectors_dirty_init(struct bcache_device *d) } } + /* + * Must wait for all threads to stop. + */ wait_event_interruptible(state->wait, - atomic_read(&state->started) == 0 || - test_bit(CACHE_SET_IO_DISABLE, &c->flags)); + atomic_read(&state->started) == 0); out: kfree(state); diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index b855fef4f38a6..adb9604e85ac4 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -65,6 +65,8 @@ struct mapped_device { struct gendisk *disk; struct dax_device *dax_dev; + unsigned long __percpu *pending_io; + /* * A list of ios that arrived while we were suspended. */ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d4ae31558826a..f51aea71cb036 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2590,7 +2590,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string static int get_key_size(char **key_string) { - return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1; + return (*key_string[0] == ':') ? -EINVAL : (int)(strlen(*key_string) >> 1); } #endif /* CONFIG_KEYS */ diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index eb4b5e52bd6ff..9399006dbc546 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2473,9 +2473,11 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, dm_integrity_io_error(ic, "invalid sector in journal", -EIO); sec &= ~(sector_t)(ic->sectors_per_block - 1); } + if (unlikely(sec >= ic->provided_data_sectors)) { + journal_entry_set_unused(je); + continue; + } } - if (unlikely(sec >= ic->provided_data_sectors)) - continue; get_area_and_offset(ic, sec, &area, &offset); restore_last_bytes(ic, access_journal_data(ic, i, j), je); for (k = j + 1; k < ic->journal_section_entries; k++) { diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 35d368c418d03..0e039a8c0bf2e 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -195,6 +195,7 @@ void dm_stats_init(struct dm_stats *stats) mutex_init(&stats->mutex); INIT_LIST_HEAD(&stats->list); + stats->precise_timestamps = false; stats->last = alloc_percpu(struct dm_stats_last_position); for_each_possible_cpu(cpu) { last = per_cpu_ptr(stats->last, cpu); @@ -231,6 +232,22 @@ void dm_stats_cleanup(struct dm_stats *stats) mutex_destroy(&stats->mutex); } +static void dm_stats_recalc_precise_timestamps(struct dm_stats *stats) +{ + struct list_head *l; + struct dm_stat *tmp_s; + bool precise_timestamps = false; + + list_for_each(l, &stats->list) { + tmp_s = container_of(l, struct dm_stat, list_entry); + if (tmp_s->stat_flags & STAT_PRECISE_TIMESTAMPS) { + precise_timestamps = true; + break; + } + } + stats->precise_timestamps = precise_timestamps; +} + static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, sector_t step, unsigned stat_flags, unsigned n_histogram_entries, @@ -376,6 +393,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, } ret_id = s->id; list_add_tail_rcu(&s->list_entry, l); + + dm_stats_recalc_precise_timestamps(stats); + mutex_unlock(&stats->mutex); resume_callback(md); @@ -418,6 +438,9 @@ static int dm_stats_delete(struct dm_stats *stats, int id) } list_del_rcu(&s->list_entry); + + dm_stats_recalc_precise_timestamps(stats); + mutex_unlock(&stats->mutex); /* @@ -621,13 +644,14 @@ static void __dm_stat_bio(struct dm_stat *s, int bi_rw, void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, sector_t bi_sector, unsigned bi_sectors, bool end, - unsigned long duration_jiffies, + unsigned long start_time, struct dm_stats_aux *stats_aux) { struct dm_stat *s; sector_t end_sector; struct dm_stats_last_position *last; bool got_precise_time; + unsigned long duration_jiffies = 0; if (unlikely(!bi_sectors)) return; @@ -647,16 +671,16 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, )); WRITE_ONCE(last->last_sector, end_sector); WRITE_ONCE(last->last_rw, bi_rw); - } + } else + duration_jiffies = jiffies - start_time; rcu_read_lock(); got_precise_time = false; list_for_each_entry_rcu(s, &stats->list, list_entry) { if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) { - if (!end) - stats_aux->duration_ns = ktime_to_ns(ktime_get()); - else + /* start (!end) duration_ns is set by DM core's alloc_io() */ + if (end) stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns; got_precise_time = true; } diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h index 2ddfae678f320..09c81a1ec057d 100644 --- a/drivers/md/dm-stats.h +++ b/drivers/md/dm-stats.h @@ -13,8 +13,7 @@ struct dm_stats { struct mutex mutex; struct list_head list; /* list of struct dm_stat */ struct dm_stats_last_position __percpu *last; - sector_t last_sector; - unsigned last_rw; + bool precise_timestamps; }; struct dm_stats_aux { @@ -32,7 +31,7 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv, void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, sector_t bi_sector, unsigned bi_sectors, bool end, - unsigned long duration_jiffies, + unsigned long start_time, struct dm_stats_aux *aux); static inline bool dm_stats_used(struct dm_stats *st) @@ -40,4 +39,10 @@ static inline bool dm_stats_used(struct dm_stats *st) return !list_empty(&st->list); } +static inline void dm_stats_record_start(struct dm_stats *stats, struct dm_stats_aux *aux) +{ + if (unlikely(stats->precise_timestamps)) + aux->duration_ns = ktime_to_ns(ktime_get()); +} + #endif diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 997ace47bbd54..394778d8bf549 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -484,33 +484,48 @@ u64 dm_start_time_ns_from_clone(struct bio *bio) } EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone); -static void start_io_acct(struct dm_io *io) +static bool bio_is_flush_with_data(struct bio *bio) { - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; - - bio_start_io_acct_time(bio, io->start_time); - if (unlikely(dm_stats_used(&md->stats))) - dm_stats_account_io(&md->stats, bio_data_dir(bio), - bio->bi_iter.bi_sector, bio_sectors(bio), - false, 0, &io->stats_aux); + return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size); } -static void end_io_acct(struct mapped_device *md, struct bio *bio, - unsigned long start_time, struct dm_stats_aux *stats_aux) +static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) { - unsigned long duration = jiffies - start_time; + bool is_flush_with_data; + unsigned int bi_size; - bio_end_io_acct(bio, start_time); + /* If REQ_PREFLUSH set save any payload but do not account it */ + is_flush_with_data = bio_is_flush_with_data(bio); + if (is_flush_with_data) { + bi_size = bio->bi_iter.bi_size; + bio->bi_iter.bi_size = 0; + } + + if (!end) + bio_start_io_acct_time(bio, start_time); + else + bio_end_io_acct(bio, start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), - true, duration, stats_aux); + end, start_time, stats_aux); + + /* Restore bio's payload so it does get accounted upon requeue */ + if (is_flush_with_data) + bio->bi_iter.bi_size = bi_size; +} + +static void start_io_acct(struct dm_io *io) +{ + dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux); +} - /* nudge anyone waiting on suspend queue */ - if (unlikely(wq_has_sleeper(&md->wait))) - wake_up(&md->wait); +static void end_io_acct(struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) +{ + dm_io_acct(true, md, bio, start_time, stats_aux); } static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) @@ -531,12 +546,15 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->magic = DM_IO_MAGIC; io->status = 0; atomic_set(&io->io_count, 1); + this_cpu_inc(*md->pending_io); io->orig_bio = bio; io->md = md; spin_lock_init(&io->endio_lock); io->start_time = jiffies; + dm_stats_record_start(&md->stats, &io->stats_aux); + return io; } @@ -826,11 +844,17 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) stats_aux = io->stats_aux; free_io(md, io); end_io_acct(md, bio, start_time, &stats_aux); + smp_wmb(); + this_cpu_dec(*md->pending_io); + + /* nudge anyone waiting on suspend queue */ + if (unlikely(wq_has_sleeper(&md->wait))) + wake_up(&md->wait); if (io_error == BLK_STS_DM_REQUEUE) return; - if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) { + if (bio_is_flush_with_data(bio)) { /* * Preflush done for flush with data, reissue * without REQ_PREFLUSH. @@ -1607,6 +1631,7 @@ static void cleanup_mapped_device(struct mapped_device *md) md->dax_dev = NULL; } + dm_cleanup_zoned_dev(md); if (md->disk) { spin_lock(&_minor_lock); md->disk->private_data = NULL; @@ -1619,6 +1644,11 @@ static void cleanup_mapped_device(struct mapped_device *md) blk_cleanup_disk(md->disk); } + if (md->pending_io) { + free_percpu(md->pending_io); + md->pending_io = NULL; + } + cleanup_srcu_struct(&md->io_barrier); mutex_destroy(&md->suspend_lock); @@ -1627,7 +1657,6 @@ static void cleanup_mapped_device(struct mapped_device *md) mutex_destroy(&md->swap_bios_lock); dm_mq_cleanup_mapped_device(md); - dm_cleanup_zoned_dev(md); } /* @@ -1721,6 +1750,10 @@ static struct mapped_device *alloc_dev(int minor) if (!md->wq) goto bad; + md->pending_io = alloc_percpu(unsigned long); + if (!md->pending_io) + goto bad; + dm_stats_init(&md->stats); /* Populate the mapping, nobody knows we exist yet */ @@ -2128,16 +2161,13 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put); -static bool md_in_flight_bios(struct mapped_device *md) +static bool dm_in_flight_bios(struct mapped_device *md) { int cpu; - struct block_device *part = dm_disk(md)->part0; - long sum = 0; + unsigned long sum = 0; - for_each_possible_cpu(cpu) { - sum += part_stat_local_read_cpu(part, in_flight[0], cpu); - sum += part_stat_local_read_cpu(part, in_flight[1], cpu); - } + for_each_possible_cpu(cpu) + sum += *per_cpu_ptr(md->pending_io, cpu); return sum != 0; } @@ -2150,7 +2180,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta while (true) { prepare_to_wait(&md->wait, &wait, task_state); - if (!md_in_flight_bios(md)) + if (!dm_in_flight_bios(md)) break; if (signal_pending_state(task_state, current)) { @@ -2162,6 +2192,8 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta } finish_wait(&md->wait, &wait); + smp_rmb(); + return r; } diff --git a/drivers/media/i2c/adv7511-v4l2.c b/drivers/media/i2c/adv7511-v4l2.c index 8e13cae40ec5b..db7f41a80770d 100644 --- a/drivers/media/i2c/adv7511-v4l2.c +++ b/drivers/media/i2c/adv7511-v4l2.c @@ -522,7 +522,7 @@ static void log_infoframe(struct v4l2_subdev *sd, const struct adv7511_cfg_read_ buffer[3] = 0; buffer[3] = hdmi_infoframe_checksum(buffer, len + 4); - if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) { + if (hdmi_infoframe_unpack(&frame, buffer, len + 4) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc); return; } diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index a2fa408d2d9f5..bb0c8fc6d3832 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -2484,7 +2484,7 @@ static int adv76xx_read_infoframe(struct v4l2_subdev *sd, int index, buffer[i + 3] = infoframe_read(sd, adv76xx_cri[index].payload_addr + i); - if (hdmi_infoframe_unpack(frame, buffer, sizeof(buffer)) < 0) { + if (hdmi_infoframe_unpack(frame, buffer, len + 3) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, adv76xx_cri[index].desc); return -ENOENT; diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c index 9d6eed0f82819..22caa070273b4 100644 --- a/drivers/media/i2c/adv7842.c +++ b/drivers/media/i2c/adv7842.c @@ -2583,7 +2583,7 @@ static void log_infoframe(struct v4l2_subdev *sd, const struct adv7842_cfg_read_ for (i = 0; i < len; i++) buffer[i + 3] = infoframe_read(sd, cri->payload_addr + i); - if (hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)) < 0) { + if (hdmi_infoframe_unpack(&frame, buffer, len + 3) < 0) { v4l2_err(sd, "%s: unpack of %s infoframe failed\n", __func__, cri->desc); return; } diff --git a/drivers/media/i2c/ov2740.c b/drivers/media/i2c/ov2740.c index bab720c7c1de1..d5f0eabf20c6a 100644 --- a/drivers/media/i2c/ov2740.c +++ b/drivers/media/i2c/ov2740.c @@ -1162,6 +1162,7 @@ static int ov2740_probe(struct i2c_client *client) if (!ov2740) return -ENOMEM; + v4l2_i2c_subdev_init(&ov2740->sd, client, &ov2740_subdev_ops); full_power = acpi_dev_state_d0(&client->dev); if (full_power) { ret = ov2740_identify_module(ov2740); @@ -1171,13 +1172,6 @@ static int ov2740_probe(struct i2c_client *client) } } - v4l2_i2c_subdev_init(&ov2740->sd, client, &ov2740_subdev_ops); - ret = ov2740_identify_module(ov2740); - if (ret) { - dev_err(&client->dev, "failed to find sensor: %d", ret); - return ret; - } - mutex_init(&ov2740->mutex); ov2740->cur_mode = &supported_modes[0]; ret = ov2740_init_controls(ov2740); diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c index ddbd71394db33..db5a19babe67d 100644 --- a/drivers/media/i2c/ov5640.c +++ b/drivers/media/i2c/ov5640.c @@ -2293,7 +2293,6 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd, struct ov5640_dev *sensor = to_ov5640_dev(sd); const struct ov5640_mode_info *new_mode; struct v4l2_mbus_framefmt *mbus_fmt = &format->format; - struct v4l2_mbus_framefmt *fmt; int ret; if (format->pad != 0) @@ -2311,12 +2310,10 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd, if (ret) goto out; - if (format->which == V4L2_SUBDEV_FORMAT_TRY) - fmt = v4l2_subdev_get_try_format(sd, sd_state, 0); - else - fmt = &sensor->fmt; - - *fmt = *mbus_fmt; + if (format->which == V4L2_SUBDEV_FORMAT_TRY) { + *v4l2_subdev_get_try_format(sd, sd_state, 0) = *mbus_fmt; + goto out; + } if (new_mode != sensor->current_mode) { sensor->current_mode = new_mode; @@ -2325,6 +2322,9 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd, if (mbus_fmt->code != sensor->fmt.code) sensor->pending_fmt_change = true; + /* update format even if code is unchanged, resolution might change */ + sensor->fmt = *mbus_fmt; + __v4l2_ctrl_s_ctrl_int64(sensor->ctrls.pixel_rate, ov5640_calc_pixel_rate(sensor)); out: diff --git a/drivers/media/i2c/ov5648.c b/drivers/media/i2c/ov5648.c index 947d437ed0efe..ef8b52dc9401d 100644 --- a/drivers/media/i2c/ov5648.c +++ b/drivers/media/i2c/ov5648.c @@ -639,7 +639,7 @@ struct ov5648_ctrls { struct v4l2_ctrl *pixel_rate; struct v4l2_ctrl_handler handler; -} __packed; +}; struct ov5648_sensor { struct device *dev; @@ -1778,8 +1778,14 @@ static int ov5648_state_configure(struct ov5648_sensor *sensor, static int ov5648_state_init(struct ov5648_sensor *sensor) { - return ov5648_state_configure(sensor, &ov5648_modes[0], - ov5648_mbus_codes[0]); + int ret; + + mutex_lock(&sensor->mutex); + ret = ov5648_state_configure(sensor, &ov5648_modes[0], + ov5648_mbus_codes[0]); + mutex_unlock(&sensor->mutex); + + return ret; } /* Sensor Base */ diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index f67412150b16b..eb59dc8bb5929 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -472,9 +472,16 @@ static int ov6650_get_selection(struct v4l2_subdev *sd, { struct i2c_client *client = v4l2_get_subdevdata(sd); struct ov6650 *priv = to_ov6650(client); + struct v4l2_rect *rect; - if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE) - return -EINVAL; + if (sel->which == V4L2_SUBDEV_FORMAT_TRY) { + /* pre-select try crop rectangle */ + rect = &sd_state->pads->try_crop; + + } else { + /* pre-select active crop rectangle */ + rect = &priv->rect; + } switch (sel->target) { case V4L2_SEL_TGT_CROP_BOUNDS: @@ -483,14 +490,33 @@ static int ov6650_get_selection(struct v4l2_subdev *sd, sel->r.width = W_CIF; sel->r.height = H_CIF; return 0; + case V4L2_SEL_TGT_CROP: - sel->r = priv->rect; + /* use selected crop rectangle */ + sel->r = *rect; return 0; + default: return -EINVAL; } } +static bool is_unscaled_ok(int width, int height, struct v4l2_rect *rect) +{ + return width > rect->width >> 1 || height > rect->height >> 1; +} + +static void ov6650_bind_align_crop_rectangle(struct v4l2_rect *rect) +{ + v4l_bound_align_image(&rect->width, 2, W_CIF, 1, + &rect->height, 2, H_CIF, 1, 0); + v4l_bound_align_image(&rect->left, DEF_HSTRT << 1, + (DEF_HSTRT << 1) + W_CIF - (__s32)rect->width, 1, + &rect->top, DEF_VSTRT << 1, + (DEF_VSTRT << 1) + H_CIF - (__s32)rect->height, + 1, 0); +} + static int ov6650_set_selection(struct v4l2_subdev *sd, struct v4l2_subdev_state *sd_state, struct v4l2_subdev_selection *sel) @@ -499,18 +525,30 @@ static int ov6650_set_selection(struct v4l2_subdev *sd, struct ov6650 *priv = to_ov6650(client); int ret; - if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE || - sel->target != V4L2_SEL_TGT_CROP) + if (sel->target != V4L2_SEL_TGT_CROP) return -EINVAL; - v4l_bound_align_image(&sel->r.width, 2, W_CIF, 1, - &sel->r.height, 2, H_CIF, 1, 0); - v4l_bound_align_image(&sel->r.left, DEF_HSTRT << 1, - (DEF_HSTRT << 1) + W_CIF - (__s32)sel->r.width, 1, - &sel->r.top, DEF_VSTRT << 1, - (DEF_VSTRT << 1) + H_CIF - (__s32)sel->r.height, - 1, 0); + ov6650_bind_align_crop_rectangle(&sel->r); + + if (sel->which == V4L2_SUBDEV_FORMAT_TRY) { + struct v4l2_rect *crop = &sd_state->pads->try_crop; + struct v4l2_mbus_framefmt *mf = &sd_state->pads->try_fmt; + /* detect current pad config scaling factor */ + bool half_scale = !is_unscaled_ok(mf->width, mf->height, crop); + + /* store new crop rectangle */ + *crop = sel->r; + /* adjust frame size */ + mf->width = crop->width >> half_scale; + mf->height = crop->height >> half_scale; + + return 0; + } + + /* V4L2_SUBDEV_FORMAT_ACTIVE */ + + /* apply new crop rectangle */ ret = ov6650_reg_write(client, REG_HSTRT, sel->r.left >> 1); if (!ret) { priv->rect.width += priv->rect.left - sel->r.left; @@ -562,30 +600,13 @@ static int ov6650_get_fmt(struct v4l2_subdev *sd, return 0; } -static bool is_unscaled_ok(int width, int height, struct v4l2_rect *rect) -{ - return width > rect->width >> 1 || height > rect->height >> 1; -} - #define to_clkrc(div) ((div) - 1) /* set the format we will capture in */ -static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) +static int ov6650_s_fmt(struct v4l2_subdev *sd, u32 code, bool half_scale) { struct i2c_client *client = v4l2_get_subdevdata(sd); struct ov6650 *priv = to_ov6650(client); - bool half_scale = !is_unscaled_ok(mf->width, mf->height, &priv->rect); - struct v4l2_subdev_selection sel = { - .which = V4L2_SUBDEV_FORMAT_ACTIVE, - .target = V4L2_SEL_TGT_CROP, - .r.left = priv->rect.left + (priv->rect.width >> 1) - - (mf->width >> (1 - half_scale)), - .r.top = priv->rect.top + (priv->rect.height >> 1) - - (mf->height >> (1 - half_scale)), - .r.width = mf->width << half_scale, - .r.height = mf->height << half_scale, - }; - u32 code = mf->code; u8 coma_set = 0, coma_mask = 0, coml_set, coml_mask; int ret; @@ -653,9 +674,7 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) coma_mask |= COMA_QCIF; } - ret = ov6650_set_selection(sd, NULL, &sel); - if (!ret) - ret = ov6650_reg_rmw(client, REG_COMA, coma_set, coma_mask); + ret = ov6650_reg_rmw(client, REG_COMA, coma_set, coma_mask); if (!ret) { priv->half_scale = half_scale; @@ -674,14 +693,12 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf = &format->format; struct i2c_client *client = v4l2_get_subdevdata(sd); struct ov6650 *priv = to_ov6650(client); + struct v4l2_rect *crop; + bool half_scale; if (format->pad) return -EINVAL; - if (is_unscaled_ok(mf->width, mf->height, &priv->rect)) - v4l_bound_align_image(&mf->width, 2, W_CIF, 1, - &mf->height, 2, H_CIF, 1, 0); - switch (mf->code) { case MEDIA_BUS_FMT_Y10_1X10: mf->code = MEDIA_BUS_FMT_Y8_1X8; @@ -699,10 +716,17 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, break; } + if (format->which == V4L2_SUBDEV_FORMAT_TRY) + crop = &sd_state->pads->try_crop; + else + crop = &priv->rect; + + half_scale = !is_unscaled_ok(mf->width, mf->height, crop); + if (format->which == V4L2_SUBDEV_FORMAT_TRY) { - /* store media bus format code and frame size in pad config */ - sd_state->pads->try_fmt.width = mf->width; - sd_state->pads->try_fmt.height = mf->height; + /* store new mbus frame format code and size in pad config */ + sd_state->pads->try_fmt.width = crop->width >> half_scale; + sd_state->pads->try_fmt.height = crop->height >> half_scale; sd_state->pads->try_fmt.code = mf->code; /* return default mbus frame format updated with pad config */ @@ -712,9 +736,11 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, mf->code = sd_state->pads->try_fmt.code; } else { - /* apply new media bus format code and frame size */ - int ret = ov6650_s_fmt(sd, mf); + int ret = 0; + /* apply new media bus frame format and scaling if changed */ + if (mf->code != priv->code || half_scale != priv->half_scale) + ret = ov6650_s_fmt(sd, mf->code, half_scale); if (ret) return ret; @@ -890,9 +916,8 @@ static int ov6650_video_probe(struct v4l2_subdev *sd) if (!ret) ret = ov6650_prog_dflt(client, xclk->clkrc); if (!ret) { - struct v4l2_mbus_framefmt mf = ov6650_def_fmt; - - ret = ov6650_s_fmt(sd, &mf); + /* driver default frame format, no scaling */ + ret = ov6650_s_fmt(sd, ov6650_def_fmt.code, false); } if (!ret) ret = v4l2_ctrl_handler_setup(&priv->hdl); diff --git a/drivers/media/i2c/ov8865.c b/drivers/media/i2c/ov8865.c index d9d016cfa9ac0..e0dd0f4849a7a 100644 --- a/drivers/media/i2c/ov8865.c +++ b/drivers/media/i2c/ov8865.c @@ -457,8 +457,8 @@ #define OV8865_NATIVE_WIDTH 3296 #define OV8865_NATIVE_HEIGHT 2528 -#define OV8865_ACTIVE_START_TOP 32 -#define OV8865_ACTIVE_START_LEFT 80 +#define OV8865_ACTIVE_START_LEFT 16 +#define OV8865_ACTIVE_START_TOP 40 #define OV8865_ACTIVE_WIDTH 3264 #define OV8865_ACTIVE_HEIGHT 2448 diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c index 8cc9bec43688e..5ca3d0cc653a8 100644 --- a/drivers/media/pci/bt8xx/bttv-driver.c +++ b/drivers/media/pci/bt8xx/bttv-driver.c @@ -3890,7 +3890,7 @@ static int bttv_register_video(struct bttv *btv) /* video */ vdev_init(btv, &btv->video_dev, &bttv_video_template, "video"); - btv->video_dev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_TUNER | + btv->video_dev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE | V4L2_CAP_STREAMING; if (btv->tuner_type != TUNER_ABSENT) btv->video_dev.device_caps |= V4L2_CAP_TUNER; @@ -3911,7 +3911,7 @@ static int bttv_register_video(struct bttv *btv) /* vbi */ vdev_init(btv, &btv->vbi_dev, &bttv_video_template, "vbi"); btv->vbi_dev.device_caps = V4L2_CAP_VBI_CAPTURE | V4L2_CAP_READWRITE | - V4L2_CAP_STREAMING | V4L2_CAP_TUNER; + V4L2_CAP_STREAMING; if (btv->tuner_type != TUNER_ABSENT) btv->vbi_dev.device_caps |= V4L2_CAP_TUNER; diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c index 680e1e3fe89b7..2c1d5137ac470 100644 --- a/drivers/media/pci/cx88/cx88-mpeg.c +++ b/drivers/media/pci/cx88/cx88-mpeg.c @@ -162,6 +162,9 @@ int cx8802_start_dma(struct cx8802_dev *dev, cx_write(MO_TS_GPCNTRL, GP_COUNT_CONTROL_RESET); q->count = 0; + /* clear interrupt status register */ + cx_write(MO_TS_INTSTAT, 0x1f1111); + /* enable irqs */ dprintk(1, "setting the interrupt mask\n"); cx_set(MO_PCI_INTMSK, core->pci_irqmask | PCI_INT_TSINT); diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h index 4cf92dee65271..ce3a7ca51736e 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.h +++ b/drivers/media/pci/ivtv/ivtv-driver.h @@ -330,7 +330,6 @@ struct ivtv_stream { struct ivtv *itv; /* for ease of use */ const char *name; /* name of the stream */ int type; /* stream type */ - u32 caps; /* V4L2 capabilities */ struct v4l2_fh *fh; /* pointer to the streaming filehandle */ spinlock_t qlock; /* locks access to the queues */ diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c index 0cdf6b3210c2f..fee460e2ca863 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.c +++ b/drivers/media/pci/ivtv/ivtv-ioctl.c @@ -438,7 +438,7 @@ static int ivtv_g_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2_f struct ivtv_stream *s = &itv->streams[fh2id(fh)->type]; struct v4l2_window *winfmt = &fmt->fmt.win; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -EINVAL; if (!itv->osd_video_pbase) return -EINVAL; @@ -549,7 +549,7 @@ static int ivtv_try_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2 u32 chromakey = fmt->fmt.win.chromakey; u8 global_alpha = fmt->fmt.win.global_alpha; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -EINVAL; if (!itv->osd_video_pbase) return -EINVAL; @@ -1383,7 +1383,7 @@ static int ivtv_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *fb) 0, }; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -ENOTTY; if (!itv->osd_video_pbase) return -ENOTTY; @@ -1450,7 +1450,7 @@ static int ivtv_s_fbuf(struct file *file, void *fh, const struct v4l2_framebuffe struct ivtv_stream *s = &itv->streams[fh2id(fh)->type]; struct yuv_playback_info *yi = &itv->yuv_info; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -ENOTTY; if (!itv->osd_video_pbase) return -ENOTTY; @@ -1470,7 +1470,7 @@ static int ivtv_overlay(struct file *file, void *fh, unsigned int on) struct ivtv *itv = id->itv; struct ivtv_stream *s = &itv->streams[fh2id(fh)->type]; - if (!(s->caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) + if (!(s->vdev.device_caps & V4L2_CAP_VIDEO_OUTPUT_OVERLAY)) return -ENOTTY; if (!itv->osd_video_pbase) return -ENOTTY; diff --git a/drivers/media/pci/ivtv/ivtv-streams.c b/drivers/media/pci/ivtv/ivtv-streams.c index 6e455948cc77a..13d7d55e65949 100644 --- a/drivers/media/pci/ivtv/ivtv-streams.c +++ b/drivers/media/pci/ivtv/ivtv-streams.c @@ -176,7 +176,7 @@ static void ivtv_stream_init(struct ivtv *itv, int type) s->itv = itv; s->type = type; s->name = ivtv_stream_info[type].name; - s->caps = ivtv_stream_info[type].v4l2_caps; + s->vdev.device_caps = ivtv_stream_info[type].v4l2_caps; if (ivtv_stream_info[type].pio) s->dma = DMA_NONE; @@ -299,12 +299,9 @@ static int ivtv_reg_dev(struct ivtv *itv, int type) if (s_mpg->vdev.v4l2_dev) num = s_mpg->vdev.num + ivtv_stream_info[type].num_offset; } - s->vdev.device_caps = s->caps; - if (itv->osd_video_pbase) { - itv->streams[IVTV_DEC_STREAM_TYPE_YUV].vdev.device_caps |= - V4L2_CAP_VIDEO_OUTPUT_OVERLAY; - itv->streams[IVTV_DEC_STREAM_TYPE_MPG].vdev.device_caps |= - V4L2_CAP_VIDEO_OUTPUT_OVERLAY; + if (itv->osd_video_pbase && (type == IVTV_DEC_STREAM_TYPE_YUV || + type == IVTV_DEC_STREAM_TYPE_MPG)) { + s->vdev.device_caps |= V4L2_CAP_VIDEO_OUTPUT_OVERLAY; itv->v4l2_cap |= V4L2_CAP_VIDEO_OUTPUT_OVERLAY; } video_set_drvdata(&s->vdev, s); diff --git a/drivers/media/pci/saa7134/saa7134-alsa.c b/drivers/media/pci/saa7134/saa7134-alsa.c index fb24d2ed3621b..d3cde05a6ebab 100644 --- a/drivers/media/pci/saa7134/saa7134-alsa.c +++ b/drivers/media/pci/saa7134/saa7134-alsa.c @@ -1214,7 +1214,7 @@ static int alsa_device_exit(struct saa7134_dev *dev) static int saa7134_alsa_init(void) { - struct saa7134_dev *dev = NULL; + struct saa7134_dev *dev; saa7134_dmasound_init = alsa_device_init; saa7134_dmasound_exit = alsa_device_exit; @@ -1229,7 +1229,7 @@ static int saa7134_alsa_init(void) alsa_device_init(dev); } - if (dev == NULL) + if (list_empty(&saa7134_devlist)) pr_info("saa7134 ALSA: no saa7134 cards found\n"); return 0; diff --git a/drivers/media/platform/aspeed-video.c b/drivers/media/platform/aspeed-video.c index 7a24daf7165a4..bdeecde0d9978 100644 --- a/drivers/media/platform/aspeed-video.c +++ b/drivers/media/platform/aspeed-video.c @@ -153,7 +153,7 @@ #define VE_SRC_TB_EDGE_DET_BOT GENMASK(28, VE_SRC_TB_EDGE_DET_BOT_SHF) #define VE_MODE_DETECT_STATUS 0x098 -#define VE_MODE_DETECT_H_PIXELS GENMASK(11, 0) +#define VE_MODE_DETECT_H_PERIOD GENMASK(11, 0) #define VE_MODE_DETECT_V_LINES_SHF 16 #define VE_MODE_DETECT_V_LINES GENMASK(27, VE_MODE_DETECT_V_LINES_SHF) #define VE_MODE_DETECT_STATUS_VSYNC BIT(28) @@ -164,6 +164,8 @@ #define VE_SYNC_STATUS_VSYNC_SHF 16 #define VE_SYNC_STATUS_VSYNC GENMASK(27, VE_SYNC_STATUS_VSYNC_SHF) +#define VE_H_TOTAL_PIXELS 0x0A0 + #define VE_INTERRUPT_CTRL 0x304 #define VE_INTERRUPT_STATUS 0x308 #define VE_INTERRUPT_MODE_DETECT_WD BIT(0) @@ -802,6 +804,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) u32 src_lr_edge; u32 src_tb_edge; u32 sync; + u32 htotal; struct v4l2_bt_timings *det = &video->detected_timings; det->width = MIN_WIDTH; @@ -847,6 +850,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) src_tb_edge = aspeed_video_read(video, VE_SRC_TB_EDGE_DET); mds = aspeed_video_read(video, VE_MODE_DETECT_STATUS); sync = aspeed_video_read(video, VE_SYNC_STATUS); + htotal = aspeed_video_read(video, VE_H_TOTAL_PIXELS); video->frame_bottom = (src_tb_edge & VE_SRC_TB_EDGE_DET_BOT) >> VE_SRC_TB_EDGE_DET_BOT_SHF; @@ -863,8 +867,7 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) VE_SRC_LR_EDGE_DET_RT_SHF; video->frame_left = src_lr_edge & VE_SRC_LR_EDGE_DET_LEFT; det->hfrontporch = video->frame_left; - det->hbackporch = (mds & VE_MODE_DETECT_H_PIXELS) - - video->frame_right; + det->hbackporch = htotal - video->frame_right; det->hsync = sync & VE_SYNC_STATUS_HSYNC; if (video->frame_left > video->frame_right) continue; diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c index 660cd0ab6749a..24807782c9e50 100644 --- a/drivers/media/platform/atmel/atmel-isc-base.c +++ b/drivers/media/platform/atmel/atmel-isc-base.c @@ -1369,14 +1369,12 @@ static int isc_enum_framesizes(struct file *file, void *fh, struct v4l2_frmsizeenum *fsize) { struct isc_device *isc = video_drvdata(file); - struct v4l2_subdev_frame_size_enum fse = { - .code = isc->config.sd_format->mbus_code, - .index = fsize->index, - .which = V4L2_SUBDEV_FORMAT_ACTIVE, - }; int ret = -EINVAL; int i; + if (fsize->index) + return -EINVAL; + for (i = 0; i < isc->num_user_formats; i++) if (isc->user_formats[i]->fourcc == fsize->pixel_format) ret = 0; @@ -1388,14 +1386,14 @@ static int isc_enum_framesizes(struct file *file, void *fh, if (ret) return ret; - ret = v4l2_subdev_call(isc->current_subdev->sd, pad, enum_frame_size, - NULL, &fse); - if (ret) - return ret; + fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; - fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE; - fsize->discrete.width = fse.max_width; - fsize->discrete.height = fse.max_height; + fsize->stepwise.min_width = 16; + fsize->stepwise.max_width = isc->max_width; + fsize->stepwise.min_height = 16; + fsize->stepwise.max_height = isc->max_height; + fsize->stepwise.step_width = 1; + fsize->stepwise.step_height = 1; return 0; } diff --git a/drivers/media/platform/atmel/atmel-sama7g5-isc.c b/drivers/media/platform/atmel/atmel-sama7g5-isc.c index 5d1c76f680f37..2b1082295c130 100644 --- a/drivers/media/platform/atmel/atmel-sama7g5-isc.c +++ b/drivers/media/platform/atmel/atmel-sama7g5-isc.c @@ -556,7 +556,6 @@ static int microchip_xisc_remove(struct platform_device *pdev) v4l2_device_unregister(&isc->v4l2_dev); - clk_disable_unprepare(isc->ispck); clk_disable_unprepare(isc->hclock); isc_clk_cleanup(isc); @@ -568,7 +567,6 @@ static int __maybe_unused xisc_runtime_suspend(struct device *dev) { struct isc_device *isc = dev_get_drvdata(dev); - clk_disable_unprepare(isc->ispck); clk_disable_unprepare(isc->hclock); return 0; @@ -583,10 +581,6 @@ static int __maybe_unused xisc_runtime_resume(struct device *dev) if (ret) return ret; - ret = clk_prepare_enable(isc->ispck); - if (ret) - clk_disable_unprepare(isc->hclock); - return ret; } diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 3cd47ba26357e..a57822b050706 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -409,6 +409,7 @@ static struct vdoa_data *coda_get_vdoa_data(void) if (!vdoa_data) vdoa_data = ERR_PTR(-EPROBE_DEFER); + put_device(&vdoa_pdev->dev); out: of_node_put(vdoa_node); diff --git a/drivers/media/platform/davinci/vpif.c b/drivers/media/platform/davinci/vpif.c index 5a89d885d0e3b..4a260f4ed236b 100644 --- a/drivers/media/platform/davinci/vpif.c +++ b/drivers/media/platform/davinci/vpif.c @@ -41,6 +41,11 @@ MODULE_ALIAS("platform:" VPIF_DRIVER_NAME); #define VPIF_CH2_MAX_MODES 15 #define VPIF_CH3_MAX_MODES 2 +struct vpif_data { + struct platform_device *capture; + struct platform_device *display; +}; + DEFINE_SPINLOCK(vpif_lock); EXPORT_SYMBOL_GPL(vpif_lock); @@ -423,16 +428,31 @@ int vpif_channel_getfid(u8 channel_id) } EXPORT_SYMBOL(vpif_channel_getfid); +static void vpif_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + kfree(pdev); +} + static int vpif_probe(struct platform_device *pdev) { static struct resource *res_irq; struct platform_device *pdev_capture, *pdev_display; struct device_node *endpoint = NULL; + struct vpif_data *data; + int ret; vpif_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(vpif_base)) return PTR_ERR(vpif_base); + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + platform_set_drvdata(pdev, data); + pm_runtime_enable(&pdev->dev); pm_runtime_get(&pdev->dev); @@ -456,46 +476,79 @@ static int vpif_probe(struct platform_device *pdev) res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!res_irq) { dev_warn(&pdev->dev, "Missing IRQ resource.\n"); - pm_runtime_put(&pdev->dev); - return -EINVAL; + ret = -EINVAL; + goto err_put_rpm; } - pdev_capture = devm_kzalloc(&pdev->dev, sizeof(*pdev_capture), - GFP_KERNEL); - if (pdev_capture) { - pdev_capture->name = "vpif_capture"; - pdev_capture->id = -1; - pdev_capture->resource = res_irq; - pdev_capture->num_resources = 1; - pdev_capture->dev.dma_mask = pdev->dev.dma_mask; - pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_capture->dev.parent = &pdev->dev; - platform_device_register(pdev_capture); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_capture.\n"); + pdev_capture = kzalloc(sizeof(*pdev_capture), GFP_KERNEL); + if (!pdev_capture) { + ret = -ENOMEM; + goto err_put_rpm; } - pdev_display = devm_kzalloc(&pdev->dev, sizeof(*pdev_display), - GFP_KERNEL); - if (pdev_display) { - pdev_display->name = "vpif_display"; - pdev_display->id = -1; - pdev_display->resource = res_irq; - pdev_display->num_resources = 1; - pdev_display->dev.dma_mask = pdev->dev.dma_mask; - pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - pdev_display->dev.parent = &pdev->dev; - platform_device_register(pdev_display); - } else { - dev_warn(&pdev->dev, "Unable to allocate memory for pdev_display.\n"); + pdev_capture->name = "vpif_capture"; + pdev_capture->id = -1; + pdev_capture->resource = res_irq; + pdev_capture->num_resources = 1; + pdev_capture->dev.dma_mask = pdev->dev.dma_mask; + pdev_capture->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_capture->dev.parent = &pdev->dev; + pdev_capture->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_capture); + if (ret) + goto err_put_pdev_capture; + + pdev_display = kzalloc(sizeof(*pdev_display), GFP_KERNEL); + if (!pdev_display) { + ret = -ENOMEM; + goto err_put_pdev_capture; } + pdev_display->name = "vpif_display"; + pdev_display->id = -1; + pdev_display->resource = res_irq; + pdev_display->num_resources = 1; + pdev_display->dev.dma_mask = pdev->dev.dma_mask; + pdev_display->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; + pdev_display->dev.parent = &pdev->dev; + pdev_display->dev.release = vpif_pdev_release; + + ret = platform_device_register(pdev_display); + if (ret) + goto err_put_pdev_display; + + data->capture = pdev_capture; + data->display = pdev_display; + return 0; + +err_put_pdev_display: + platform_device_put(pdev_display); +err_put_pdev_capture: + platform_device_put(pdev_capture); +err_put_rpm: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + kfree(data); + + return ret; } static int vpif_remove(struct platform_device *pdev) { + struct vpif_data *data = platform_get_drvdata(pdev); + + if (data->capture) + platform_device_unregister(data->capture); + if (data->display) + platform_device_unregister(data->display); + + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + + kfree(data); + return 0; } diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg.c b/drivers/media/platform/imx-jpeg/mxc-jpeg.c index 4ca96cf9def76..83a2b4d13bad3 100644 --- a/drivers/media/platform/imx-jpeg/mxc-jpeg.c +++ b/drivers/media/platform/imx-jpeg/mxc-jpeg.c @@ -947,8 +947,13 @@ static void mxc_jpeg_device_run(void *priv) v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true); jpeg_src_buf = vb2_to_mxc_buf(&src_buf->vb2_buf); + if (q_data_cap->fmt->colplanes != dst_buf->vb2_buf.num_planes) { + dev_err(dev, "Capture format %s has %d planes, but capture buffer has %d planes\n", + q_data_cap->fmt->name, q_data_cap->fmt->colplanes, + dst_buf->vb2_buf.num_planes); + jpeg_src_buf->jpeg_parse_error = true; + } if (jpeg_src_buf->jpeg_parse_error) { - jpeg->slot_data[ctx->slot].used = false; v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); diff --git a/drivers/media/platform/meson/ge2d/ge2d.c b/drivers/media/platform/meson/ge2d/ge2d.c index ccda18e5a3774..5e7b319f300df 100644 --- a/drivers/media/platform/meson/ge2d/ge2d.c +++ b/drivers/media/platform/meson/ge2d/ge2d.c @@ -215,35 +215,35 @@ static void ge2d_hw_start(struct meson_ge2d *ge2d) regmap_write(ge2d->map, GE2D_SRC1_CLIPY_START_END, FIELD_PREP(GE2D_START, ctx->in.crop.top) | - FIELD_PREP(GE2D_END, ctx->in.crop.top + ctx->in.crop.height)); + FIELD_PREP(GE2D_END, ctx->in.crop.top + ctx->in.crop.height - 1)); regmap_write(ge2d->map, GE2D_SRC1_CLIPX_START_END, FIELD_PREP(GE2D_START, ctx->in.crop.left) | - FIELD_PREP(GE2D_END, ctx->in.crop.left + ctx->in.crop.width)); + FIELD_PREP(GE2D_END, ctx->in.crop.left + ctx->in.crop.width - 1)); regmap_write(ge2d->map, GE2D_SRC2_CLIPY_START_END, FIELD_PREP(GE2D_START, ctx->out.crop.top) | - FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height)); + FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height - 1)); regmap_write(ge2d->map, GE2D_SRC2_CLIPX_START_END, FIELD_PREP(GE2D_START, ctx->out.crop.left) | - FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width)); + FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width - 1)); regmap_write(ge2d->map, GE2D_DST_CLIPY_START_END, FIELD_PREP(GE2D_START, ctx->out.crop.top) | - FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height)); + FIELD_PREP(GE2D_END, ctx->out.crop.top + ctx->out.crop.height - 1)); regmap_write(ge2d->map, GE2D_DST_CLIPX_START_END, FIELD_PREP(GE2D_START, ctx->out.crop.left) | - FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width)); + FIELD_PREP(GE2D_END, ctx->out.crop.left + ctx->out.crop.width - 1)); regmap_write(ge2d->map, GE2D_SRC1_Y_START_END, - FIELD_PREP(GE2D_END, ctx->in.pix_fmt.height)); + FIELD_PREP(GE2D_END, ctx->in.pix_fmt.height - 1)); regmap_write(ge2d->map, GE2D_SRC1_X_START_END, - FIELD_PREP(GE2D_END, ctx->in.pix_fmt.width)); + FIELD_PREP(GE2D_END, ctx->in.pix_fmt.width - 1)); regmap_write(ge2d->map, GE2D_SRC2_Y_START_END, - FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height)); + FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height - 1)); regmap_write(ge2d->map, GE2D_SRC2_X_START_END, - FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width)); + FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width - 1)); regmap_write(ge2d->map, GE2D_DST_Y_START_END, - FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height)); + FIELD_PREP(GE2D_END, ctx->out.pix_fmt.height - 1)); regmap_write(ge2d->map, GE2D_DST_X_START_END, - FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width)); + FIELD_PREP(GE2D_END, ctx->out.pix_fmt.width - 1)); /* Color, no blend, use source color */ reg = GE2D_ALU_DO_COLOR_OPERATION_LOGIC(LOGIC_OPERATION_COPY, diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c index cd27f637dbe7c..cfc7ebed8fb7a 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c @@ -102,6 +102,8 @@ struct mtk_vcodec_fw *mtk_vcodec_fw_vpu_init(struct mtk_vcodec_dev *dev, vpu_wdt_reg_handler(fw_pdev, mtk_vcodec_vpu_reset_handler, dev, rst_id); fw = devm_kzalloc(&dev->plat_dev->dev, sizeof(*fw), GFP_KERNEL); + if (!fw) + return ERR_PTR(-ENOMEM); fw->type = VPU; fw->ops = &mtk_vcodec_vpu_msg; fw->pdev = fw_pdev; diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c index 5b9b57f4d9bf8..68cf68dbcace2 100644 --- a/drivers/media/platform/omap3isp/ispstat.c +++ b/drivers/media/platform/omap3isp/ispstat.c @@ -512,7 +512,7 @@ int omap3isp_stat_request_statistics(struct ispstat *stat, int omap3isp_stat_request_statistics_time32(struct ispstat *stat, struct omap3isp_stat_data_time32 *data) { - struct omap3isp_stat_data data64; + struct omap3isp_stat_data data64 = { }; int ret; ret = omap3isp_stat_request_statistics(stat, &data64); @@ -521,7 +521,8 @@ int omap3isp_stat_request_statistics_time32(struct ispstat *stat, data->ts.tv_sec = data64.ts.tv_sec; data->ts.tv_usec = data64.ts.tv_usec; - memcpy(&data->buf, &data64.buf, sizeof(*data) - sizeof(data->ts)); + data->buf = (uintptr_t)data64.buf; + memcpy(&data->frame, &data64.frame, sizeof(data->frame)); return 0; } diff --git a/drivers/media/platform/qcom/camss/camss-csid-170.c b/drivers/media/platform/qcom/camss/camss-csid-170.c index ac22ff29d2a9f..82f59933ad7b3 100644 --- a/drivers/media/platform/qcom/camss/camss-csid-170.c +++ b/drivers/media/platform/qcom/camss/camss-csid-170.c @@ -105,7 +105,8 @@ #define CSID_RDI_CTRL(rdi) ((IS_LITE ? 0x208 : 0x308)\ + 0x100 * (rdi)) #define RDI_CTRL_HALT_CMD 0 -#define ALT_CMD_RESUME_AT_FRAME_BOUNDARY 1 +#define HALT_CMD_HALT_AT_FRAME_BOUNDARY 0 +#define HALT_CMD_RESUME_AT_FRAME_BOUNDARY 1 #define RDI_CTRL_HALT_MODE 2 #define CSID_RDI_FRM_DROP_PATTERN(rdi) ((IS_LITE ? 0x20C : 0x30C)\ @@ -366,7 +367,7 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable) val |= input_format->width & 0x1fff << TPG_DT_n_CFG_0_FRAME_WIDTH; writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_0(0)); - val = DATA_TYPE_RAW_10BIT << TPG_DT_n_CFG_1_DATA_TYPE; + val = format->data_type << TPG_DT_n_CFG_1_DATA_TYPE; writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_1(0)); val = tg->mode << TPG_DT_n_CFG_2_PAYLOAD_MODE; @@ -382,8 +383,9 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable) val = 1 << RDI_CFG0_BYTE_CNTR_EN; val |= 1 << RDI_CFG0_FORMAT_MEASURE_EN; val |= 1 << RDI_CFG0_TIMESTAMP_EN; + /* note: for non-RDI path, this should be format->decode_format */ val |= DECODE_FORMAT_PAYLOAD_ONLY << RDI_CFG0_DECODE_FORMAT; - val |= DATA_TYPE_RAW_10BIT << RDI_CFG0_DATA_TYPE; + val |= format->data_type << RDI_CFG0_DATA_TYPE; val |= vc << RDI_CFG0_VIRTUAL_CHANNEL; val |= dt_id << RDI_CFG0_DT_ID; writel_relaxed(val, csid->base + CSID_RDI_CFG0(0)); @@ -443,13 +445,10 @@ static void csid_configure_stream(struct csid_device *csid, u8 enable) val |= 1 << CSI2_RX_CFG1_MISR_EN; writel_relaxed(val, csid->base + CSID_CSI2_RX_CFG1); // csi2_vc_mode_shift_val ? - /* error irqs start at BIT(11) */ - writel_relaxed(~0u, csid->base + CSID_CSI2_RX_IRQ_MASK); - - /* RDI irq */ - writel_relaxed(~0u, csid->base + CSID_TOP_IRQ_MASK); - - val = 1 << RDI_CTRL_HALT_CMD; + if (enable) + val = HALT_CMD_RESUME_AT_FRAME_BOUNDARY << RDI_CTRL_HALT_CMD; + else + val = HALT_CMD_HALT_AT_FRAME_BOUNDARY << RDI_CTRL_HALT_CMD; writel_relaxed(val, csid->base + CSID_RDI_CTRL(0)); } diff --git a/drivers/media/platform/qcom/camss/camss-vfe-170.c b/drivers/media/platform/qcom/camss/camss-vfe-170.c index f524af712a843..600150cfc4f70 100644 --- a/drivers/media/platform/qcom/camss/camss-vfe-170.c +++ b/drivers/media/platform/qcom/camss/camss-vfe-170.c @@ -395,17 +395,7 @@ static irqreturn_t vfe_isr(int irq, void *dev) */ static int vfe_halt(struct vfe_device *vfe) { - unsigned long time; - - reinit_completion(&vfe->halt_complete); - - time = wait_for_completion_timeout(&vfe->halt_complete, - msecs_to_jiffies(VFE_HALT_TIMEOUT_MS)); - if (!time) { - dev_err(vfe->camss->dev, "VFE halt timeout\n"); - return -EIO; - } - + /* rely on vfe_disable_output() to stop the VFE */ return 0; } diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c index 84c3a511ec31e..0bca95d016507 100644 --- a/drivers/media/platform/qcom/venus/helpers.c +++ b/drivers/media/platform/qcom/venus/helpers.c @@ -189,7 +189,6 @@ int venus_helper_alloc_dpb_bufs(struct venus_inst *inst) buf->va = dma_alloc_attrs(dev, buf->size, &buf->da, GFP_KERNEL, buf->attrs); if (!buf->va) { - kfree(buf); ret = -ENOMEM; goto fail; } @@ -209,6 +208,7 @@ int venus_helper_alloc_dpb_bufs(struct venus_inst *inst) return 0; fail: + kfree(buf); venus_helper_free_dpb_bufs(inst); return ret; } diff --git a/drivers/media/platform/qcom/venus/hfi_cmds.c b/drivers/media/platform/qcom/venus/hfi_cmds.c index 5aea07307e02e..4ecd444050bb6 100644 --- a/drivers/media/platform/qcom/venus/hfi_cmds.c +++ b/drivers/media/platform/qcom/venus/hfi_cmds.c @@ -1054,6 +1054,8 @@ static int pkt_session_set_property_1x(struct hfi_session_set_property_pkt *pkt, pkt->shdr.hdr.size += sizeof(u32) + sizeof(*info); break; } + case HFI_PROPERTY_PARAM_VENC_HDR10_PQ_SEI: + return -ENOTSUPP; /* FOLLOWING PROPERTIES ARE NOT IMPLEMENTED IN CORE YET */ case HFI_PROPERTY_CONFIG_BUFFER_REQUIREMENTS: diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c index 84bafc3118cc6..adea4c3b8c204 100644 --- a/drivers/media/platform/qcom/venus/venc.c +++ b/drivers/media/platform/qcom/venus/venc.c @@ -662,8 +662,8 @@ static int venc_set_properties(struct venus_inst *inst) ptype = HFI_PROPERTY_PARAM_VENC_H264_TRANSFORM_8X8; h264_transform.enable_type = 0; - if (ctr->profile.h264 == HFI_H264_PROFILE_HIGH || - ctr->profile.h264 == HFI_H264_PROFILE_CONSTRAINED_HIGH) + if (ctr->profile.h264 == V4L2_MPEG_VIDEO_H264_PROFILE_HIGH || + ctr->profile.h264 == V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH) h264_transform.enable_type = ctr->h264_8x8_transform; ret = hfi_session_set_property(inst, ptype, &h264_transform); diff --git a/drivers/media/platform/qcom/venus/venc_ctrls.c b/drivers/media/platform/qcom/venus/venc_ctrls.c index 1ada42df314dc..ea5805e71c143 100644 --- a/drivers/media/platform/qcom/venus/venc_ctrls.c +++ b/drivers/media/platform/qcom/venus/venc_ctrls.c @@ -320,8 +320,8 @@ static int venc_op_s_ctrl(struct v4l2_ctrl *ctrl) ctr->intra_refresh_period = ctrl->val; break; case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM: - if (ctr->profile.h264 != HFI_H264_PROFILE_HIGH && - ctr->profile.h264 != HFI_H264_PROFILE_CONSTRAINED_HIGH) + if (ctr->profile.h264 != V4L2_MPEG_VIDEO_H264_PROFILE_HIGH && + ctr->profile.h264 != V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH) return -EINVAL; /* @@ -457,7 +457,7 @@ int venc_ctrl_init(struct venus_inst *inst) V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MIN_QP, 1, 51, 1, 1); v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops, - V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM, 0, 1, 1, 0); + V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM, 0, 1, 1, 1); v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops, V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MIN_QP, 1, 51, 1, 1); diff --git a/drivers/media/platform/ti-vpe/cal-video.c b/drivers/media/platform/ti-vpe/cal-video.c index 7799da1cc261b..3e936a2ca36c6 100644 --- a/drivers/media/platform/ti-vpe/cal-video.c +++ b/drivers/media/platform/ti-vpe/cal-video.c @@ -823,6 +823,9 @@ static int cal_ctx_v4l2_init_formats(struct cal_ctx *ctx) /* Enumerate sub device formats and enable all matching local formats */ ctx->active_fmt = devm_kcalloc(ctx->cal->dev, cal_num_formats, sizeof(*ctx->active_fmt), GFP_KERNEL); + if (!ctx->active_fmt) + return -ENOMEM; + ctx->num_active_fmt = 0; for (j = 0, i = 0; ; ++j) { diff --git a/drivers/media/rc/gpio-ir-tx.c b/drivers/media/rc/gpio-ir-tx.c index c6cd2e6d8e654..a50701cfbbd7b 100644 --- a/drivers/media/rc/gpio-ir-tx.c +++ b/drivers/media/rc/gpio-ir-tx.c @@ -48,11 +48,29 @@ static int gpio_ir_tx_set_carrier(struct rc_dev *dev, u32 carrier) return 0; } +static void delay_until(ktime_t until) +{ + /* + * delta should never exceed 0.5 seconds (IR_MAX_DURATION) and on + * m68k ndelay(s64) does not compile; so use s32 rather than s64. + */ + s32 delta; + + while (true) { + delta = ktime_us_delta(until, ktime_get()); + if (delta <= 0) + return; + + /* udelay more than 1ms may not work */ + delta = min(delta, 1000); + udelay(delta); + } +} + static void gpio_ir_tx_unmodulated(struct gpio_ir *gpio_ir, uint *txbuf, uint count) { ktime_t edge; - s32 delta; int i; local_irq_disable(); @@ -63,9 +81,7 @@ static void gpio_ir_tx_unmodulated(struct gpio_ir *gpio_ir, uint *txbuf, gpiod_set_value(gpio_ir->gpio, !(i % 2)); edge = ktime_add_us(edge, txbuf[i]); - delta = ktime_us_delta(edge, ktime_get()); - if (delta > 0) - udelay(delta); + delay_until(edge); } gpiod_set_value(gpio_ir->gpio, 0); @@ -97,9 +113,7 @@ static void gpio_ir_tx_modulated(struct gpio_ir *gpio_ir, uint *txbuf, if (i % 2) { // space edge = ktime_add_us(edge, txbuf[i]); - delta = ktime_us_delta(edge, ktime_get()); - if (delta > 0) - udelay(delta); + delay_until(edge); } else { // pulse ktime_t last = ktime_add_us(edge, txbuf[i]); diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 7e98e7e3aacec..1968067092594 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -458,7 +458,7 @@ static int irtoy_probe(struct usb_interface *intf, err = usb_submit_urb(irtoy->urb_in, GFP_KERNEL); if (err != 0) { dev_err(irtoy->dev, "fail to submit in urb: %d\n", err); - return err; + goto free_rcdev; } err = irtoy_setup(irtoy); diff --git a/drivers/media/test-drivers/vidtv/vidtv_s302m.c b/drivers/media/test-drivers/vidtv/vidtv_s302m.c index d79b65854627c..4676083cee3b8 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_s302m.c +++ b/drivers/media/test-drivers/vidtv/vidtv_s302m.c @@ -455,6 +455,9 @@ struct vidtv_encoder e->name = kstrdup(args.name, GFP_KERNEL); e->encoder_buf = vzalloc(VIDTV_S302M_BUF_SZ); + if (!e->encoder_buf) + goto out_kfree_e; + e->encoder_buf_sz = VIDTV_S302M_BUF_SZ; e->encoder_buf_offset = 0; @@ -467,10 +470,8 @@ struct vidtv_encoder e->is_video_encoder = false; ctx = kzalloc(priv_sz, GFP_KERNEL); - if (!ctx) { - kfree(e); - return NULL; - } + if (!ctx) + goto out_kfree_buf; e->ctx = ctx; ctx->last_duration = 0; @@ -498,6 +499,14 @@ struct vidtv_encoder e->next = NULL; return e; + +out_kfree_buf: + kfree(e->encoder_buf); + +out_kfree_e: + kfree(e->name); + kfree(e); + return NULL; } void vidtv_s302m_encoder_destroy(struct vidtv_encoder *e) diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index b451ce3cb169a..ae25d2cbfdfee 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -3936,6 +3936,8 @@ static int em28xx_usb_probe(struct usb_interface *intf, goto err_free; } + kref_init(&dev->ref); + dev->devno = nr; dev->model = id->driver_info; dev->alt = -1; @@ -4036,6 +4038,8 @@ static int em28xx_usb_probe(struct usb_interface *intf, } if (dev->board.has_dual_ts && em28xx_duplicate_dev(dev) == 0) { + kref_init(&dev->dev_next->ref); + dev->dev_next->ts = SECONDARY_TS; dev->dev_next->alt = -1; dev->dev_next->is_audio_only = has_vendor_audio && @@ -4090,12 +4094,8 @@ static int em28xx_usb_probe(struct usb_interface *intf, em28xx_write_reg(dev, 0x0b, 0x82); mdelay(100); } - - kref_init(&dev->dev_next->ref); } - kref_init(&dev->ref); - request_modules(dev); /* @@ -4150,11 +4150,8 @@ static void em28xx_usb_disconnect(struct usb_interface *intf) em28xx_close_extension(dev); - if (dev->dev_next) { - em28xx_close_extension(dev->dev_next); + if (dev->dev_next) em28xx_release_resources(dev->dev_next); - } - em28xx_release_resources(dev); if (dev->dev_next) { diff --git a/drivers/media/usb/go7007/s2250-board.c b/drivers/media/usb/go7007/s2250-board.c index c742cc88fac5c..1fa6f10ee157b 100644 --- a/drivers/media/usb/go7007/s2250-board.c +++ b/drivers/media/usb/go7007/s2250-board.c @@ -504,6 +504,7 @@ static int s2250_probe(struct i2c_client *client, u8 *data; struct go7007 *go = i2c_get_adapdata(adapter); struct go7007_usb *usb = go->hpi_context; + int err = -EIO; audio = i2c_new_dummy_device(adapter, TLV320_ADDRESS >> 1); if (IS_ERR(audio)) @@ -532,11 +533,8 @@ static int s2250_probe(struct i2c_client *client, V4L2_CID_HUE, -512, 511, 1, 0); sd->ctrl_handler = &state->hdl; if (state->hdl.error) { - int err = state->hdl.error; - - v4l2_ctrl_handler_free(&state->hdl); - kfree(state); - return err; + err = state->hdl.error; + goto fail; } state->std = V4L2_STD_NTSC; @@ -600,7 +598,7 @@ static int s2250_probe(struct i2c_client *client, i2c_unregister_device(audio); v4l2_ctrl_handler_free(&state->hdl); kfree(state); - return -EIO; + return err; } static int s2250_remove(struct i2c_client *client) diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c index 563128d117317..60e57e0f19272 100644 --- a/drivers/media/usb/hdpvr/hdpvr-video.c +++ b/drivers/media/usb/hdpvr/hdpvr-video.c @@ -308,7 +308,6 @@ static int hdpvr_start_streaming(struct hdpvr_device *dev) dev->status = STATUS_STREAMING; - INIT_WORK(&dev->worker, hdpvr_transmit_buffers); schedule_work(&dev->worker); v4l2_dbg(MSG_BUFFER, hdpvr_debug, &dev->v4l2_dev, @@ -1165,6 +1164,9 @@ int hdpvr_register_videodev(struct hdpvr_device *dev, struct device *parent, bool ac3 = dev->flags & HDPVR_FLAG_AC3_CAP; int res; + // initialize dev->worker + INIT_WORK(&dev->worker, hdpvr_transmit_buffers); + dev->cur_std = V4L2_STD_525_60; dev->width = 720; dev->height = 480; diff --git a/drivers/media/usb/stk1160/stk1160-core.c b/drivers/media/usb/stk1160/stk1160-core.c index 4e1698f788187..ce717502ea4c3 100644 --- a/drivers/media/usb/stk1160/stk1160-core.c +++ b/drivers/media/usb/stk1160/stk1160-core.c @@ -403,7 +403,7 @@ static void stk1160_disconnect(struct usb_interface *interface) /* Here is the only place where isoc get released */ stk1160_uninit_isoc(dev); - stk1160_clear_queue(dev); + stk1160_clear_queue(dev, VB2_BUF_STATE_ERROR); video_unregister_device(&dev->vdev); v4l2_device_disconnect(&dev->v4l2_dev); diff --git a/drivers/media/usb/stk1160/stk1160-v4l.c b/drivers/media/usb/stk1160/stk1160-v4l.c index 6a4eb616d5160..1aa953469402f 100644 --- a/drivers/media/usb/stk1160/stk1160-v4l.c +++ b/drivers/media/usb/stk1160/stk1160-v4l.c @@ -258,7 +258,7 @@ static int stk1160_start_streaming(struct stk1160 *dev) stk1160_uninit_isoc(dev); out_stop_hw: usb_set_interface(dev->udev, 0, 0); - stk1160_clear_queue(dev); + stk1160_clear_queue(dev, VB2_BUF_STATE_QUEUED); mutex_unlock(&dev->v4l_lock); @@ -306,7 +306,7 @@ static int stk1160_stop_streaming(struct stk1160 *dev) stk1160_stop_hw(dev); - stk1160_clear_queue(dev); + stk1160_clear_queue(dev, VB2_BUF_STATE_ERROR); stk1160_dbg("streaming stopped\n"); @@ -745,7 +745,7 @@ static const struct video_device v4l_template = { /********************************************************************/ /* Must be called with both v4l_lock and vb_queue_lock hold */ -void stk1160_clear_queue(struct stk1160 *dev) +void stk1160_clear_queue(struct stk1160 *dev, enum vb2_buffer_state vb2_state) { struct stk1160_buffer *buf; unsigned long flags; @@ -756,7 +756,7 @@ void stk1160_clear_queue(struct stk1160 *dev) buf = list_first_entry(&dev->avail_bufs, struct stk1160_buffer, list); list_del(&buf->list); - vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); + vb2_buffer_done(&buf->vb.vb2_buf, vb2_state); stk1160_dbg("buffer [%p/%d] aborted\n", buf, buf->vb.vb2_buf.index); } @@ -766,7 +766,7 @@ void stk1160_clear_queue(struct stk1160 *dev) buf = dev->isoc_ctl.buf; dev->isoc_ctl.buf = NULL; - vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); + vb2_buffer_done(&buf->vb.vb2_buf, vb2_state); stk1160_dbg("buffer [%p/%d] aborted\n", buf, buf->vb.vb2_buf.index); } diff --git a/drivers/media/usb/stk1160/stk1160.h b/drivers/media/usb/stk1160/stk1160.h index a31ea1c80f255..a70963ce87533 100644 --- a/drivers/media/usb/stk1160/stk1160.h +++ b/drivers/media/usb/stk1160/stk1160.h @@ -166,7 +166,7 @@ struct regval { int stk1160_vb2_setup(struct stk1160 *dev); int stk1160_video_register(struct stk1160 *dev); void stk1160_video_unregister(struct stk1160 *dev); -void stk1160_clear_queue(struct stk1160 *dev); +void stk1160_clear_queue(struct stk1160 *dev, enum vb2_buffer_state vb2_state); /* Provided by stk1160-video.c */ int stk1160_alloc_isoc(struct stk1160 *dev); diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig index 6ee75c6c820e3..7f08e2b5a5041 100644 --- a/drivers/media/v4l2-core/Kconfig +++ b/drivers/media/v4l2-core/Kconfig @@ -48,6 +48,11 @@ config VIDEO_TUNER config V4L2_JPEG_HELPER tristate +config V4L2_LOOPBACK + tristate "V4L2 loopback device" + help + V4L2 loopback device + # Used by drivers that need v4l2-h264.ko config V4L2_H264 tristate diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile index 83fac5c746f54..9d7f1d0a331b0 100644 --- a/drivers/media/v4l2-core/Makefile +++ b/drivers/media/v4l2-core/Makefile @@ -30,6 +30,8 @@ obj-$(CONFIG_V4L2_FLASH_LED_CLASS) += v4l2-flash-led-class.o obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o +obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o + obj-$(CONFIG_VIDEOBUF_GEN) += videobuf-core.o obj-$(CONFIG_VIDEOBUF_DMA_SG) += videobuf-dma-sg.o obj-$(CONFIG_VIDEOBUF_DMA_CONTIG) += videobuf-dma-contig.o diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c b/drivers/media/v4l2-core/v4l2-ctrls-core.c index 54abe5245dcc4..df8cff47a7fb5 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-core.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c @@ -112,7 +112,9 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx, struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture; struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quant; struct v4l2_ctrl_vp8_frame *p_vp8_frame; + struct v4l2_ctrl_vp9_frame *p_vp9_frame; struct v4l2_ctrl_fwht_params *p_fwht_params; + struct v4l2_ctrl_h264_scaling_matrix *p_h264_scaling_matrix; void *p = ptr.p + idx * ctrl->elem_size; if (ctrl->p_def.p_const) @@ -152,6 +154,13 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx, p_vp8_frame = p; p_vp8_frame->num_dct_parts = 1; break; + case V4L2_CTRL_TYPE_VP9_FRAME: + p_vp9_frame = p; + p_vp9_frame->profile = 0; + p_vp9_frame->bit_depth = 8; + p_vp9_frame->flags |= V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING | + V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING; + break; case V4L2_CTRL_TYPE_FWHT_PARAMS: p_fwht_params = p; p_fwht_params->version = V4L2_FWHT_VERSION; @@ -160,6 +169,15 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx, p_fwht_params->flags = V4L2_FWHT_FL_PIXENC_YUV | (2 << V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET); break; + case V4L2_CTRL_TYPE_H264_SCALING_MATRIX: + p_h264_scaling_matrix = p; + /* + * The default (flat) H.264 scaling matrix when none are + * specified in the bitstream, this is according to formulas + * (7-8) and (7-9) of the specification. + */ + memset(p_h264_scaling_matrix, 16, sizeof(*p_h264_scaling_matrix)); + break; } } diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 9ac557b8e1467..642cb90f457c6 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -279,8 +279,8 @@ static void v4l_print_format(const void *arg, bool write_only) const struct v4l2_vbi_format *vbi; const struct v4l2_sliced_vbi_format *sliced; const struct v4l2_window *win; - const struct v4l2_sdr_format *sdr; const struct v4l2_meta_format *meta; + u32 pixelformat; u32 planes; unsigned i; @@ -299,8 +299,9 @@ static void v4l_print_format(const void *arg, bool write_only) case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: mp = &p->fmt.pix_mp; + pixelformat = mp->pixelformat; pr_cont(", width=%u, height=%u, format=%p4cc, field=%s, colorspace=%d, num_planes=%u, flags=0x%x, ycbcr_enc=%u, quantization=%u, xfer_func=%u\n", - mp->width, mp->height, &mp->pixelformat, + mp->width, mp->height, &pixelformat, prt_names(mp->field, v4l2_field_names), mp->colorspace, mp->num_planes, mp->flags, mp->ycbcr_enc, mp->quantization, mp->xfer_func); @@ -343,14 +344,15 @@ static void v4l_print_format(const void *arg, bool write_only) break; case V4L2_BUF_TYPE_SDR_CAPTURE: case V4L2_BUF_TYPE_SDR_OUTPUT: - sdr = &p->fmt.sdr; - pr_cont(", pixelformat=%p4cc\n", &sdr->pixelformat); + pixelformat = p->fmt.sdr.pixelformat; + pr_cont(", pixelformat=%p4cc\n", &pixelformat); break; case V4L2_BUF_TYPE_META_CAPTURE: case V4L2_BUF_TYPE_META_OUTPUT: meta = &p->fmt.meta; + pixelformat = meta->dataformat; pr_cont(", dataformat=%p4cc, buffersize=%u\n", - &meta->dataformat, meta->buffersize); + &pixelformat, meta->buffersize); break; } } diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index e2654b422334c..675e22895ebe6 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -585,19 +585,14 @@ int v4l2_m2m_reqbufs(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, } EXPORT_SYMBOL_GPL(v4l2_m2m_reqbufs); -int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, - struct v4l2_buffer *buf) +static void v4l2_m2m_adjust_mem_offset(struct vb2_queue *vq, + struct v4l2_buffer *buf) { - struct vb2_queue *vq; - int ret = 0; - unsigned int i; - - vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - ret = vb2_querybuf(vq, buf); - /* Adjust MMAP memory offsets for the CAPTURE queue */ if (buf->memory == V4L2_MEMORY_MMAP && V4L2_TYPE_IS_CAPTURE(vq->type)) { if (V4L2_TYPE_IS_MULTIPLANAR(vq->type)) { + unsigned int i; + for (i = 0; i < buf->length; ++i) buf->m.planes[i].m.mem_offset += DST_QUEUE_OFF_BASE; @@ -605,8 +600,23 @@ int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, buf->m.offset += DST_QUEUE_OFF_BASE; } } +} - return ret; +int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, + struct v4l2_buffer *buf) +{ + struct vb2_queue *vq; + int ret; + + vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); + ret = vb2_querybuf(vq, buf); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_querybuf); @@ -763,6 +773,9 @@ int v4l2_m2m_qbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, if (ret) return ret; + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + /* * If the capture queue is streaming, but streaming hasn't started * on the device, but was asked to stop, mark the previously queued @@ -784,9 +797,17 @@ int v4l2_m2m_dqbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_buffer *buf) { struct vb2_queue *vq; + int ret; vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - return vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK); + ret = vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_dqbuf); @@ -795,9 +816,17 @@ int v4l2_m2m_prepare_buf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, { struct video_device *vdev = video_devdata(file); struct vb2_queue *vq; + int ret; vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - return vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf); + ret = vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_prepare_buf); diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c new file mode 100644 index 0000000000000..b1c747f7d13c6 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.c @@ -0,0 +1,2914 @@ +/* -*- c-file-style: "linux" -*- */ +/* + * v4l2loopback.c -- video4linux2 loopback driver + * + * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com) + * Copyright (C) 2010-2019 IOhannes m zmoelnig (zmoelnig@iem.at) + * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de) + * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) +#define HAVE__V4L2_DEVICE +#include +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36) +#define HAVE__V4L2_CTRLS +#include +#endif +#include + +#include +#include "v4l2loopback.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 1) +#define kstrtoul strict_strtoul +#endif + +#if defined(timer_setup) && defined(from_timer) +#define HAVE_TIMER_SETUP +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) +#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER +#endif + +#define V4L2LOOPBACK_VERSION_CODE \ + KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \ + V4L2LOOPBACK_VERSION_BUGFIX) + +MODULE_DESCRIPTION("V4L2 loopback video device"); +MODULE_AUTHOR("Vasily Levin, " + "IOhannes m zmoelnig ," + "Stefan Diewald," + "Anton Novikov" + "et al."); +MODULE_VERSION("0.12.5"); +MODULE_LICENSE("GPL"); + +/* + * helpers + */ +#define STRINGIFY(s) #s +#define STRINGIFY2(s) STRINGIFY(s) + +#define dprintk(fmt, args...) \ + do { \ + if (debug > 0) { \ + printk(KERN_INFO "v4l2-loopback[" STRINGIFY2( \ + __LINE__) "]: " fmt, \ + ##args); \ + } \ + } while (0) + +#define MARK() \ + do { \ + if (debug > 1) { \ + printk(KERN_INFO "%s:%d[%s]\n", __FILE__, __LINE__, \ + __func__); \ + } \ + } while (0) + +#define dprintkrw(fmt, args...) \ + do { \ + if (debug > 2) { \ + printk(KERN_INFO "v4l2-loopback[" STRINGIFY2( \ + __LINE__) "]: " fmt, \ + ##args); \ + } \ + } while (0) + +/* + * compatibility hacks + */ + +#ifndef HAVE__V4L2_CTRLS +struct v4l2_ctrl_handler { + int error; +}; +struct v4l2_ctrl_config { + void *ops; + u32 id; + const char *name; + int type; + s32 min; + s32 max; + u32 step; + s32 def; +}; +int v4l2_ctrl_handler_init(struct v4l2_ctrl_handler *hdl, + unsigned nr_of_controls_hint) +{ + hdl->error = 0; + return 0; +} +void v4l2_ctrl_handler_free(struct v4l2_ctrl_handler *hdl) +{ +} +void *v4l2_ctrl_new_custom(struct v4l2_ctrl_handler *hdl, + const struct v4l2_ctrl_config *conf, void *priv) +{ + return NULL; +} +#endif /* HAVE__V4L2_CTRLS */ + +#ifndef HAVE__V4L2_DEVICE +/* dummy v4l2_device struct/functions */ +#define V4L2_DEVICE_NAME_SIZE (20 + 16) +struct v4l2_device { + char name[V4L2_DEVICE_NAME_SIZE]; + struct v4l2_ctrl_handler *ctrl_handler; +}; +static inline int v4l2_device_register(void *dev, void *v4l2_dev) +{ + return 0; +} +static inline void v4l2_device_unregister(struct v4l2_device *v4l2_dev) +{ + return; +} +#endif /* HAVE__V4L2_DEVICE */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) +#define v4l2_file_operations file_operations +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) +void *v4l2l_vzalloc(unsigned long size) +{ + void *data = vmalloc(size); + + memset(data, 0, size); + return data; +} +#else +#define v4l2l_vzalloc vzalloc +#endif + +static inline void v4l2l_get_timestamp(struct v4l2_buffer *b) +{ + /* ktime_get_ts is considered deprecated, so use ktime_get_ts64 if possible */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + struct timespec ts; + ktime_get_ts(&ts); +#else + struct timespec64 ts; + ktime_get_ts64(&ts); +#endif + + b->timestamp.tv_sec = ts.tv_sec; + b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC); +} + +#if !defined(__poll_t) +typedef unsigned __poll_t; +#endif + +/* module constants + * can be overridden during he build process using something like + * make KCPPFLAGS="-DMAX_DEVICES=100" + */ + +/* maximum number of v4l2loopback devices that can be created */ +#ifndef MAX_DEVICES +#define MAX_DEVICES 8 +#endif + +/* whether the default is to announce capabilities exclusively or not */ +#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0 +#endif + +/* when a producer is considered to have gone stale */ +#ifndef MAX_TIMEOUT +#define MAX_TIMEOUT (100 * 1000) /* in msecs */ +#endif + +/* max buffers that can be mapped, actually they + * are all mapped to max_buffers buffers */ +#ifndef MAX_BUFFERS +#define MAX_BUFFERS 32 +#endif + +/* module parameters */ +static int debug = 0; +module_param(debug, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)"); + +#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2 +static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS; +module_param(max_buffers, int, S_IRUGO); +MODULE_PARM_DESC(max_buffers, + "how many buffers should be allocated [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]"); + +/* how many times a device can be opened + * the per-module default value can be overridden on a per-device basis using + * the /sys/devices interface + * + * note that max_openers should be at least 2 in order to get a working system: + * one opener for the producer and one opener for the consumer + * however, we leave that to the user + */ +#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10 +static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS; +module_param(max_openers, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC( + max_openers, + "how many users can open the loopback device [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]"); + +static int devices = -1; +module_param(devices, int, 0); +MODULE_PARM_DESC(devices, "how many devices should be created"); + +static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 }; +module_param_array(video_nr, int, NULL, 0444); +MODULE_PARM_DESC(video_nr, + "video device numbers (-1=auto, 0=/dev/video0, etc.)"); + +static char *card_label[MAX_DEVICES]; +module_param_array(card_label, charp, NULL, 0000); +MODULE_PARM_DESC(card_label, "card labels for each device"); + +static bool exclusive_caps[MAX_DEVICES] = { + [0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +}; +module_param_array(exclusive_caps, bool, NULL, 0444); +/* FIXXME: wording */ +MODULE_PARM_DESC( + exclusive_caps, + "whether to announce OUTPUT/CAPTURE capabilities exclusively or not [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]"); + +/* format specifications */ +#define V4L2LOOPBACK_SIZE_MIN_WIDTH 48 +#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 32 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192 + +#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640 +#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480 + +static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; +module_param(max_width, int, S_IRUGO); +MODULE_PARM_DESC(max_width, "maximum allowed frame width [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]"); +static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; +module_param(max_height, int, S_IRUGO); +MODULE_PARM_DESC(max_height, + "maximum allowed frame height [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]"); + +static DEFINE_IDR(v4l2loopback_index_idr); +static DEFINE_MUTEX(v4l2loopback_ctl_mutex); + +/* control IDs */ +#ifndef HAVE__V4L2_CTRLS +#define V4L2LOOPBACK_CID_BASE (V4L2_CID_PRIVATE_BASE) +#else +#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000) +#endif +#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0) +#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1) +#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2) +#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3) + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl); +static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = { + .s_ctrl = v4l2loopback_s_ctrl, +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_KEEP_FORMAT, + .name = "keep_format", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_SUSTAIN_FRAMERATE, + .name = "sustain_framerate", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT, + .name = "timeout", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = MAX_TIMEOUT, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT_IMAGE_IO, + .name = "timeout_image_io", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; + +/* module structures */ +struct v4l2loopback_private { + int device_nr; +}; + +/* TODO(vasaka) use typenames which are common to kernel, but first find out if + * it is needed */ +/* struct keeping state and settings of loopback device */ + +struct v4l2l_buffer { + struct v4l2_buffer buffer; + struct list_head list_head; + int use_count; +}; + +struct v4l2_loopback_device { + struct v4l2_device v4l2_dev; + struct v4l2_ctrl_handler ctrl_handler; + struct video_device *vdev; + /* pixel and stream format */ + struct v4l2_pix_format pix_format; + struct v4l2_captureparm capture_param; + unsigned long frame_jiffies; + + /* ctrls */ + int keep_format; /* CID_KEEP_FORMAT; stay ready_for_capture even when all + openers close() the device */ + int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain + (close to) nominal framerate */ + + /* buffers stuff */ + u8 *image; /* pointer to actual buffers data */ + unsigned long int imagesize; /* size of buffers data */ + int buffers_number; /* should not be big, 4 is a good choice */ + struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */ + int used_buffers; /* number of the actually used buffers */ + int max_openers; /* how many times can this device be opened */ + + int write_position; /* number of last written frame + 1 */ + struct list_head outbufs_list; /* buffers in output DQBUF order */ + int bufpos2index + [MAX_BUFFERS]; /* mapping of (read/write_position % used_buffers) + * to inner buffer index */ + long buffer_size; + + /* sustain_framerate stuff */ + struct timer_list sustain_timer; + unsigned int reread_count; + + /* timeout stuff */ + unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */ + int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will + * read/write to timeout_image */ + u8 *timeout_image; /* copy of it will be captured when timeout passes */ + struct v4l2l_buffer timeout_image_buffer; + struct timer_list timeout_timer; + int timeout_happened; + + /* sync stuff */ + atomic_t open_count; + + int ready_for_capture; /* set to the number of writers that opened the + * device and negotiated format. */ + int ready_for_output; /* set to true when no writer is currently attached + * this differs slightly from !ready_for_capture, + * e.g. when using fallback images */ + int announce_all_caps; /* set to false, if device caps (OUTPUT/CAPTURE) + * should only be announced if the resp. "ready" + * flag is set; default=TRUE */ + + int max_width; + int max_height; + + char card_label[32]; + + wait_queue_head_t read_event; + spinlock_t lock; +}; + +/* types of opener shows what opener wants to do with loopback */ +enum opener_type { + // clang-format off + UNNEGOTIATED = 0, + READER = 1, + WRITER = 2, + // clang-format on +}; + +/* struct keeping state and type of opener */ +struct v4l2_loopback_opener { + enum opener_type type; + int vidioc_enum_frameintervals_calls; + int read_position; /* number of last processed frame + 1 or + * write_position - 1 if reader went out of sync */ + unsigned int reread_count; + struct v4l2_buffer *buffers; + int buffers_number; /* should not be big, 4 is a good choice */ + int timeout_image_io; + + struct v4l2_fh fh; +}; + +#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh) + +/* this is heavily inspired by the bttv driver found in the linux kernel */ +struct v4l2l_format { + char *name; + int fourcc; /* video4linux 2 */ + int depth; /* bit/pixel */ + int flags; +}; +/* set the v4l2l_format.flags to PLANAR for non-packed formats */ +#define FORMAT_FLAGS_PLANAR 0x01 +#define FORMAT_FLAGS_COMPRESSED 0x02 + +#include "v4l2loopback_formats.h" + +static const unsigned int FORMATS = ARRAY_SIZE(formats); + +static char *fourcc2str(unsigned int fourcc, char buf[4]) +{ + buf[0] = (fourcc >> 0) & 0xFF; + buf[1] = (fourcc >> 8) & 0xFF; + buf[2] = (fourcc >> 16) & 0xFF; + buf[3] = (fourcc >> 24) & 0xFF; + + return buf; +} + +static const struct v4l2l_format *format_by_fourcc(int fourcc) +{ + unsigned int i; + + for (i = 0; i < FORMATS; i++) { + if (formats[i].fourcc == fourcc) + return formats + i; + } + + dprintk("unsupported format '%c%c%c%c'\n", (fourcc >> 0) & 0xFF, + (fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF, + (fourcc >> 24) & 0xFF); + return NULL; +} + +static void pix_format_set_size(struct v4l2_pix_format *f, + const struct v4l2l_format *fmt, + unsigned int width, unsigned int height) +{ + f->width = width; + f->height = height; + + if (fmt->flags & FORMAT_FLAGS_PLANAR) { + f->bytesperline = width; /* Y plane */ + f->sizeimage = (width * height * fmt->depth) >> 3; + } else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) { + /* doesn't make sense for compressed formats */ + f->bytesperline = 0; + f->sizeimage = (width * height * fmt->depth) >> 3; + } else { + f->bytesperline = (width * fmt->depth) >> 3; + f->sizeimage = height * f->bytesperline; + } +} + +static int set_timeperframe(struct v4l2_loopback_device *dev, + struct v4l2_fract *tpf) +{ + if ((tpf->denominator < 1) || (tpf->numerator < 1)) { + return -EINVAL; + } + dev->capture_param.timeperframe = *tpf; + dev->frame_jiffies = max(1UL, msecs_to_jiffies(1000) * tpf->numerator / + tpf->denominator); + return 0; +} + +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd); + +/* device attributes */ +/* available via sysfs: /sys/devices/virtual/video4linux/video* */ + +static ssize_t attr_show_format(struct device *cd, + struct device_attribute *attr, char *buf) +{ + /* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + const struct v4l2_fract *tpf; + char buf4cc[5], buf_fps[32]; + + if (!dev || !dev->ready_for_capture) + return 0; + tpf = &dev->capture_param.timeperframe; + + fourcc2str(dev->pix_format.pixelformat, buf4cc); + buf4cc[4] = 0; + if (tpf->numerator == 1) + snprintf(buf_fps, sizeof(buf_fps), "%d", tpf->denominator); + else + snprintf(buf_fps, sizeof(buf_fps), "%d/%d", tpf->denominator, + tpf->numerator); + return sprintf(buf, "%4s:%dx%d@%s\n", buf4cc, dev->pix_format.width, + dev->pix_format.height, buf_fps); +} + +static ssize_t attr_store_format(struct device *cd, + struct device_attribute *attr, const char *buf, + size_t len) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + int fps_num = 0, fps_den = 1; + + if (!dev) + return -ENODEV; + + /* only fps changing is supported */ + if (sscanf(buf, "@%d/%d", &fps_num, &fps_den) > 0) { + struct v4l2_fract f = { .numerator = fps_den, + .denominator = fps_num }; + int err = 0; + if ((err = set_timeperframe(dev, &f)) < 0) + return err; + return len; + } + return -EINVAL; +} + +static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format, + attr_store_format); + +static ssize_t attr_show_buffers(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + return sprintf(buf, "%d\n", dev->used_buffers); +} + +static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL); + +static ssize_t attr_show_maxopeners(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + return sprintf(buf, "%d\n", dev->max_openers); +} + +static ssize_t attr_store_maxopeners(struct device *cd, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct v4l2_loopback_device *dev = NULL; + unsigned long curr = 0; + + if (kstrtoul(buf, 0, &curr)) + return -EINVAL; + + dev = v4l2loopback_cd2dev(cd); + + if (dev->max_openers == curr) + return len; + + if (dev->open_count.counter > curr) { + /* request to limit to less openers as are currently attached to us */ + return -EINVAL; + } + + dev->max_openers = (int)curr; + + return len; +} + +static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners, + attr_store_maxopeners); + +static void v4l2loopback_remove_sysfs(struct video_device *vdev) +{ +#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x) + + if (vdev) { + V4L2_SYSFS_DESTROY(format); + V4L2_SYSFS_DESTROY(buffers); + V4L2_SYSFS_DESTROY(max_openers); + /* ... */ + } +} + +static void v4l2loopback_create_sysfs(struct video_device *vdev) +{ + int res = 0; + +#define V4L2_SYSFS_CREATE(x) \ + res = device_create_file(&vdev->dev, &dev_attr_##x); \ + if (res < 0) \ + break + if (!vdev) + return; + do { + V4L2_SYSFS_CREATE(format); + V4L2_SYSFS_CREATE(buffers); + V4L2_SYSFS_CREATE(max_openers); + /* ... */ + } while (0); + + if (res >= 0) + return; + dev_err(&vdev->dev, "%s error: %d\n", __func__, res); +} + +/* global module data */ +/* find a device based on it's device-number (e.g. '3' for /dev/video3) */ +struct v4l2loopback_lookup_cb_data { + int device_nr; + struct v4l2_loopback_device *device; +}; +static int v4l2loopback_lookup_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *device = ptr; + struct v4l2loopback_lookup_cb_data *cbdata = data; + if (cbdata && device && device->vdev) { + if (device->vdev->num == cbdata->device_nr) { + cbdata->device = device; + cbdata->device_nr = id; + return 1; + } + } + return 0; +} +static int v4l2loopback_lookup(int device_nr, + struct v4l2_loopback_device **device) +{ + struct v4l2loopback_lookup_cb_data data = { + .device_nr = device_nr, + .device = NULL, + }; + int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb, + &data); + if (1 == err) { + if (device) + *device = data.device; + return data.device_nr; + } + return -ENODEV; +} +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd) +{ + struct video_device *loopdev = to_video_device(cd); + struct v4l2loopback_private *ptr = + (struct v4l2loopback_private *)video_get_drvdata(loopdev); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f) +{ + struct video_device *loopdev = video_devdata(f); + struct v4l2loopback_private *ptr = + (struct v4l2loopback_private *)video_get_drvdata(loopdev); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +/* forward declarations */ +static void init_buffers(struct v4l2_loopback_device *dev); +static int allocate_buffers(struct v4l2_loopback_device *dev); +static int free_buffers(struct v4l2_loopback_device *dev); +static void try_free_buffers(struct v4l2_loopback_device *dev); +static int allocate_timeout_image(struct v4l2_loopback_device *dev); +static void check_timers(struct v4l2_loopback_device *dev); +static const struct v4l2_file_operations v4l2_loopback_fops; +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops; + +/* Queue helpers */ +/* next functions sets buffer flags and adjusts counters accordingly */ +static inline void set_done(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; + buffer->buffer.flags |= V4L2_BUF_FLAG_DONE; +} + +static inline void set_queued(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; + buffer->buffer.flags |= V4L2_BUF_FLAG_QUEUED; +} + +static inline void unset_flags(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; + buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; +} + +/* V4L2 ioctl caps and params calls */ +/* returns device capabilities + * called on VIDIOC_QUERYCAP + */ +static int vidioc_querycap(struct file *file, void *priv, + struct v4l2_capability *cap) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + int labellen = (sizeof(cap->card) < sizeof(dev->card_label)) ? + sizeof(cap->card) : + sizeof(dev->card_label); + int device_nr = + ((struct v4l2loopback_private *)video_get_drvdata(dev->vdev)) + ->device_nr; + __u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; + + strlcpy(cap->driver, "v4l2 loopback", sizeof(cap->driver)); + snprintf(cap->card, labellen, dev->card_label); + snprintf(cap->bus_info, sizeof(cap->bus_info), + "platform:v4l2loopback-%03d", device_nr); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + /* since 3.1.0, the v4l2-core system is supposed to set the version */ + cap->version = V4L2LOOPBACK_VERSION_CODE; +#endif + +#ifdef V4L2_CAP_VIDEO_M2M + capabilities |= V4L2_CAP_VIDEO_M2M; +#endif /* V4L2_CAP_VIDEO_M2M */ + + if (dev->announce_all_caps) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT; + } else { + if (dev->ready_for_capture) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE; + } + if (dev->ready_for_output) { + capabilities |= V4L2_CAP_VIDEO_OUTPUT; + } + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + dev->vdev->device_caps = +#endif /* >=linux-4.7.0 */ + cap->device_caps = cap->capabilities = capabilities; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0) + cap->capabilities |= V4L2_CAP_DEVICE_CAPS; +#endif + + memset(cap->reserved, 0, sizeof(cap->reserved)); + return 0; +} + +static int vidioc_enum_framesizes(struct file *file, void *fh, + struct v4l2_frmsizeenum *argp) +{ + struct v4l2_loopback_device *dev; + + /* LATER: what does the index really mean? + * if it's about enumerating formats, we can safely ignore it + * (CHECK) + */ + + /* there can be only one... */ + if (argp->index) + return -EINVAL; + + dev = v4l2loopback_getdevice(file); + if (dev->ready_for_capture) { + /* format has already been negotiated + * cannot change during runtime + */ + argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; + + argp->discrete.width = dev->pix_format.width; + argp->discrete.height = dev->pix_format.height; + } else { + /* if the format has not been negotiated yet, we accept anything + */ + argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; + + argp->stepwise.min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH; + argp->stepwise.min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT; + + argp->stepwise.max_width = dev->max_width; + argp->stepwise.max_height = dev->max_height; + + argp->stepwise.step_width = 1; + argp->stepwise.step_height = 1; + } + return 0; +} + +/* returns frameinterval (fps) for the set resolution + * called on VIDIOC_ENUM_FRAMEINTERVALS + */ +static int vidioc_enum_frameintervals(struct file *file, void *fh, + struct v4l2_frmivalenum *argp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + if (dev->ready_for_capture) { + if (opener->vidioc_enum_frameintervals_calls > 0) + return -EINVAL; + if (argp->width == dev->pix_format.width && + argp->height == dev->pix_format.height) { + argp->type = V4L2_FRMIVAL_TYPE_DISCRETE; + argp->discrete = dev->capture_param.timeperframe; + opener->vidioc_enum_frameintervals_calls++; + return 0; + } + return -EINVAL; + } + return 0; +} + +/* ------------------ CAPTURE ----------------------- */ + +/* returns device formats + * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_enum_fmt_cap(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + if (f->index) + return -EINVAL; + if (dev->ready_for_capture) { + const __u32 format = dev->pix_format.pixelformat; + + snprintf(f->description, sizeof(f->description), "[%c%c%c%c]", + (format >> 0) & 0xFF, (format >> 8) & 0xFF, + (format >> 16) & 0xFF, (format >> 24) & 0xFF); + + f->pixelformat = dev->pix_format.pixelformat; + } else { + return -EINVAL; + } + f->flags = 0; + MARK(); + return 0; +} + +/* returns current video format + * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_g_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + if (!dev->ready_for_capture) + return -EINVAL; + + fmt->fmt.pix = dev->pix_format; + MARK(); + return 0; +} + +/* checks if it is OK to change to format fmt; + * actual check is done by inner_try_fmt_cap + * just checking that pixelformat is OK and set other parameters, app should + * obey this decision + * called on VIDIOC_TRY_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_try_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + char buf[5]; + + dev = v4l2loopback_getdevice(file); + + if (0 == dev->ready_for_capture) { + dprintk("setting fmt_cap not possible yet\n"); + return -EBUSY; + } + + if (fmt->fmt.pix.pixelformat != dev->pix_format.pixelformat) + return -EINVAL; + + fmt->fmt.pix = dev->pix_format; + + buf[4] = 0; + dprintk("capFOURCC=%s\n", fourcc2str(dev->pix_format.pixelformat, buf)); + return 0; +} + +/* sets new output format, if possible + * actually format is set by input and we even do not check it, just return + * current one, but it is possible to set subregions of input TODO(vasaka) + * called on VIDIOC_S_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_s_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return vidioc_try_fmt_cap(file, priv, fmt); +} + +/* ------------------ OUTPUT ----------------------- */ + +/* returns device formats; + * LATER: allow all formats + * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_enum_fmt_out(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev; + const struct v4l2l_format *fmt; + + dev = v4l2loopback_getdevice(file); + + if (dev->ready_for_capture) { + const __u32 format = dev->pix_format.pixelformat; + + /* format has been fixed by the writer, so only one single format is supported */ + if (f->index) + return -EINVAL; + + fmt = format_by_fourcc(format); + if (NULL == fmt) + return -EINVAL; + + f->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + /* f->flags = ??; */ + snprintf(f->description, sizeof(f->description), "%s", + fmt->name); + + f->pixelformat = dev->pix_format.pixelformat; + } else { + /* fill in a dummy format */ + /* coverity[unsigned_compare] */ + if (f->index < 0 || f->index >= FORMATS) + return -EINVAL; + + fmt = &formats[f->index]; + + f->pixelformat = fmt->fourcc; + snprintf(f->description, sizeof(f->description), "%s", + fmt->name); + } + f->flags = 0; + + return 0; +} + +/* returns current video format format fmt */ +/* NOTE: this is called from the producer + * so if format has not been negotiated yet, + * it should return ALL of available formats, + * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_g_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* + * LATER: this should return the currently valid format + * gstreamer doesn't like it, if this returns -EINVAL, as it + * then concludes that there is _no_ valid format + * CHECK whether this assumption is wrong, + * or whether we have to always provide a valid format + */ + + fmt->fmt.pix = dev->pix_format; + return 0; +} + +/* checks if it is OK to change to format fmt; + * if format is negotiated do not change it + * called on VIDIOC_TRY_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_try_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* TODO(vasaka) loopback does not care about formats writer want to set, + * maybe it is a good idea to restrict format somehow */ + if (dev->ready_for_capture) { + fmt->fmt.pix = dev->pix_format; + } else { + __u32 w = fmt->fmt.pix.width; + __u32 h = fmt->fmt.pix.height; + __u32 pixfmt = fmt->fmt.pix.pixelformat; + const struct v4l2l_format *format = format_by_fourcc(pixfmt); + + if (w > dev->max_width) + w = dev->max_width; + if (h > dev->max_height) + h = dev->max_height; + + dprintk("trying image %dx%d\n", w, h); + + if (w < 1) + w = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; + + if (h < 1) + h = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; + + if (NULL == format) + format = &formats[0]; + + pix_format_set_size(&fmt->fmt.pix, format, w, h); + + fmt->fmt.pix.pixelformat = format->fourcc; + + if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) || + (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3)) + fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; + + if (V4L2_FIELD_ANY == fmt->fmt.pix.field) + fmt->fmt.pix.field = V4L2_FIELD_NONE; + + /* FIXXME: try_fmt should never modify the device-state */ + dev->pix_format = fmt->fmt.pix; + } + return 0; +} + +/* sets new output format, if possible; + * allocate data here because we do not know if it will be streaming or + * read/write IO + * called on VIDIOC_S_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_s_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + char buf[5]; + int ret; + MARK(); + + dev = v4l2loopback_getdevice(file); + ret = vidioc_try_fmt_out(file, priv, fmt); + + dprintk("s_fmt_out(%d) %d...%d\n", ret, dev->ready_for_capture, + dev->pix_format.sizeimage); + + buf[4] = 0; + dprintk("outFOURCC=%s\n", fourcc2str(dev->pix_format.pixelformat, buf)); + + if (ret < 0) + return ret; + + if (!dev->ready_for_capture) { + dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage); + fmt->fmt.pix.sizeimage = dev->buffer_size; + allocate_buffers(dev); + } + return ret; +} + +// #define V4L2L_OVERLAY +#ifdef V4L2L_OVERLAY +/* ------------------ OVERLAY ----------------------- */ +/* currently unsupported */ +/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work + * while it should only require it, if overlay is requested + * once the gstreamer element is fixed, remove the overlay dummies + */ +#warning OVERLAY dummies +static int vidioc_g_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} + +static int vidioc_s_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} +#endif /* V4L2L_OVERLAY */ + +/* ------------------ PARAMs ----------------------- */ + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_G_PARM + */ +static int vidioc_g_parm(struct file *file, void *priv, + struct v4l2_streamparm *parm) +{ + /* do not care about type of opener, hope these enums would always be + * compatible */ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + parm->parm.capture = dev->capture_param; + return 0; +} + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_S_PARM + */ +static int vidioc_s_parm(struct file *file, void *priv, + struct v4l2_streamparm *parm) +{ + struct v4l2_loopback_device *dev; + int err = 0; + MARK(); + + dev = v4l2loopback_getdevice(file); + dprintk("vidioc_s_parm called frate=%d/%d\n", + parm->parm.capture.timeperframe.numerator, + parm->parm.capture.timeperframe.denominator); + + switch (parm->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if ((err = set_timeperframe( + dev, &parm->parm.capture.timeperframe)) < 0) + return err; + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if ((err = set_timeperframe( + dev, &parm->parm.capture.timeperframe)) < 0) + return err; + break; + default: + return -1; + } + + parm->parm.capture = dev->capture_param; + return 0; +} + +#ifdef V4L2LOOPBACK_WITH_STD +/* sets a tv standard, actually we do not need to handle this any special way + * added to support effecttv + * called on VIDIOC_S_STD + */ +static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std) +{ + v4l2_std_id req_std = 0, supported_std = 0; + const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0; + + if (_std) { + req_std = *_std; + *_std = all_std; + } + + /* we support everything in V4L2_STD_ALL, but not more... */ + supported_std = (all_std & req_std); + if (no_std == supported_std) + return -EINVAL; + + return 0; +} + +/* gets a fake video standard + * called on VIDIOC_G_STD + */ +static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +/* gets a fake video standard + * called on VIDIOC_QUERYSTD + */ +static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +#endif /* V4L2LOOPBACK_WITH_STD */ + +/* get ctrls info + * called on VIDIOC_QUERYCTRL + */ +static int vidioc_queryctrl(struct file *file, void *fh, + struct v4l2_queryctrl *q) +{ + const struct v4l2_ctrl_config *cnf = 0; + switch (q->id) { + case CID_KEEP_FORMAT: + cnf = &v4l2loopback_ctrl_keepformat; + break; + case CID_SUSTAIN_FRAMERATE: + cnf = &v4l2loopback_ctrl_sustainframerate; + break; + case CID_TIMEOUT: + cnf = &v4l2loopback_ctrl_timeout; + break; + case CID_TIMEOUT_IMAGE_IO: + cnf = &v4l2loopback_ctrl_timeoutimageio; + break; + default: + return -EINVAL; + } + if (!cnf) + BUG(); + + strcpy(q->name, cnf->name); + q->default_value = cnf->def; + q->type = cnf->type; + q->minimum = cnf->min; + q->maximum = cnf->max; + q->step = cnf->step; + + memset(q->reserved, 0, sizeof(q->reserved)); + return 0; +} + +static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *c) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + + switch (c->id) { + case CID_KEEP_FORMAT: + c->value = dev->keep_format; + break; + case CID_SUSTAIN_FRAMERATE: + c->value = dev->sustain_framerate; + break; + case CID_TIMEOUT: + c->value = jiffies_to_msecs(dev->timeout_jiffies); + break; + case CID_TIMEOUT_IMAGE_IO: + c->value = dev->timeout_image_io; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id, + s64 val) +{ + switch (id) { + case CID_KEEP_FORMAT: + if (val < 0 || val > 1) + return -EINVAL; + dev->keep_format = val; + try_free_buffers( + dev); /* will only free buffers if !keep_format */ + break; + case CID_SUSTAIN_FRAMERATE: + if (val < 0 || val > 1) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->sustain_framerate = val; + check_timers(dev); + spin_unlock_bh(&dev->lock); + break; + case CID_TIMEOUT: + if (val < 0 || val > MAX_TIMEOUT) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->timeout_jiffies = msecs_to_jiffies(val); + check_timers(dev); + spin_unlock_bh(&dev->lock); + allocate_timeout_image(dev); + break; + case CID_TIMEOUT_IMAGE_IO: + if (val < 0 || val > 1) + return -EINVAL; + dev->timeout_image_io = val; + break; + default: + return -EINVAL; + } + return 0; +} + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct v4l2_loopback_device *dev = container_of( + ctrl->handler, struct v4l2_loopback_device, ctrl_handler); + return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val); +} +static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *c) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + return v4l2loopback_set_ctrl(dev, c->id, c->value); +} + +/* returns set of device outputs, in our case there is only one + * called on VIDIOC_ENUMOUTPUT + */ +static int vidioc_enum_output(struct file *file, void *fh, + struct v4l2_output *outp) +{ + __u32 index = outp->index; + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + MARK(); + + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + + if (0 != index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(outp, 0, sizeof(*outp)); + + outp->index = index; + strlcpy(outp->name, "loopback in", sizeof(outp->name)); + outp->type = V4L2_OUTPUT_TYPE_ANALOG; + outp->audioset = 0; + outp->modulator = 0; +#ifdef V4L2LOOPBACK_WITH_STD + outp->std = V4L2_STD_ALL; +#ifdef V4L2_OUT_CAP_STD + outp->capabilities |= V4L2_OUT_CAP_STD; +#endif /* V4L2_OUT_CAP_STD */ +#endif /* V4L2LOOPBACK_WITH_STD */ + + return 0; +} + +/* which output is currently active, + * called on VIDIOC_G_OUTPUT + */ +static int vidioc_g_output(struct file *file, void *fh, unsigned int *i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + if (i) + *i = 0; + return 0; +} + +/* set output, can make sense if we have more than one video src, + * called on VIDIOC_S_OUTPUT + */ +static int vidioc_s_output(struct file *file, void *fh, unsigned int i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + + if (i) + return -EINVAL; + + return 0; +} + +/* returns set of device inputs, in our case there is only one, + * but later I may add more + * called on VIDIOC_ENUMINPUT + */ +static int vidioc_enum_input(struct file *file, void *fh, + struct v4l2_input *inp) +{ + __u32 index = inp->index; + MARK(); + + if (0 != index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(inp, 0, sizeof(*inp)); + + inp->index = index; + strlcpy(inp->name, "loopback", sizeof(inp->name)); + inp->type = V4L2_INPUT_TYPE_CAMERA; + inp->audioset = 0; + inp->tuner = 0; + inp->status = 0; + +#ifdef V4L2LOOPBACK_WITH_STD + inp->std = V4L2_STD_ALL; +#ifdef V4L2_IN_CAP_STD + inp->capabilities |= V4L2_IN_CAP_STD; +#endif +#endif /* V4L2LOOPBACK_WITH_STD */ + + return 0; +} + +/* which input is currently active, + * called on VIDIOC_G_INPUT + */ +static int vidioc_g_input(struct file *file, void *fh, unsigned int *i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_capture) + return -ENOTTY; + if (i) + *i = 0; + return 0; +} + +/* set input, can make sense if we have more than one video src, + * called on VIDIOC_S_INPUT + */ +static int vidioc_s_input(struct file *file, void *fh, unsigned int i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_capture) + return -ENOTTY; + if (i == 0) + return 0; + return -EINVAL; +} + +/* --------------- V4L2 ioctl buffer related calls ----------------- */ + +/* negotiate buffer type + * only mmap streaming supported + * called on VIDIOC_REQBUFS + */ +static int vidioc_reqbufs(struct file *file, void *fh, + struct v4l2_requestbuffers *b) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + int i; + MARK(); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + dprintk("reqbufs: %d\t%d=%d\n", b->memory, b->count, + dev->buffers_number); + if (opener->timeout_image_io) { + if (b->memory != V4L2_MEMORY_MMAP) + return -EINVAL; + b->count = 1; + return 0; + } + + init_buffers(dev); + switch (b->memory) { + case V4L2_MEMORY_MMAP: + /* do nothing here, buffers are always allocated */ + if (b->count < 1 || dev->buffers_number < 1) + return 0; + + if (b->count > dev->buffers_number) + b->count = dev->buffers_number; + + /* make sure that outbufs_list contains buffers from 0 to used_buffers-1 + * actually, it will have been already populated via v4l2_loopback_init() + * at this point */ + if (list_empty(&dev->outbufs_list)) { + for (i = 0; i < dev->used_buffers; ++i) + list_add_tail(&dev->buffers[i].list_head, + &dev->outbufs_list); + } + + /* also, if dev->used_buffers is going to be decreased, we should remove + * out-of-range buffers from outbufs_list, and fix bufpos2index mapping */ + if (b->count < dev->used_buffers) { + struct v4l2l_buffer *pos, *n; + + list_for_each_entry_safe (pos, n, &dev->outbufs_list, + list_head) { + if (pos->buffer.index >= b->count) + list_del(&pos->list_head); + } + + /* after we update dev->used_buffers, buffers in outbufs_list will + * correspond to dev->write_position + [0;b->count-1] range */ + i = dev->write_position; + list_for_each_entry (pos, &dev->outbufs_list, + list_head) { + dev->bufpos2index[i % b->count] = + pos->buffer.index; + ++i; + } + } + + opener->buffers_number = b->count; + if (opener->buffers_number < dev->used_buffers) + dev->used_buffers = opener->buffers_number; + return 0; + default: + return -EINVAL; + } +} + +/* returns buffer asked for; + * give app as many buffers as it wants, if it less than MAX, + * but map them in our inner buffers + * called on VIDIOC_QUERYBUF + */ +static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *b) +{ + enum v4l2_buf_type type; + int index; + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + + MARK(); + + type = b->type; + index = b->index; + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + if ((b->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) && + (b->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) { + return -EINVAL; + } + if (b->index > max_buffers) + return -EINVAL; + + if (opener->timeout_image_io) + *b = dev->timeout_image_buffer.buffer; + else + *b = dev->buffers[b->index % dev->used_buffers].buffer; + + b->type = type; + b->index = index; + dprintkrw("buffer type: %d (of %d with size=%ld)\n", b->memory, + dev->buffers_number, dev->buffer_size); + + /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' + https://github.com/umlaeute/v4l2loopback/issues/60 */ + b->flags &= ~V4L2_BUF_FLAG_DONE; + b->flags |= V4L2_BUF_FLAG_QUEUED; + + return 0; +} + +static void buffer_written(struct v4l2_loopback_device *dev, + struct v4l2l_buffer *buf) +{ + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + spin_lock_bh(&dev->lock); + + dev->bufpos2index[dev->write_position % dev->used_buffers] = + buf->buffer.index; + list_move_tail(&buf->list_head, &dev->outbufs_list); + ++dev->write_position; + dev->reread_count = 0; + + check_timers(dev); + spin_unlock_bh(&dev->lock); +} + +/* put buffer to queue + * called on VIDIOC_QBUF + */ +static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + struct v4l2l_buffer *b; + int index; + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + if (buf->index > max_buffers) + return -EINVAL; + if (opener->timeout_image_io) + return 0; + + index = buf->index % dev->used_buffers; + b = &dev->buffers[index]; + + switch (buf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + dprintkrw("capture QBUF index: %d\n", index); + set_queued(b); + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + dprintkrw("output QBUF pos: %d index: %d\n", + dev->write_position, index); + if (buf->timestamp.tv_sec == 0 && buf->timestamp.tv_usec == 0) + v4l2l_get_timestamp(&b->buffer); + else + b->buffer.timestamp = buf->timestamp; + b->buffer.bytesused = buf->bytesused; + set_done(b); + buffer_written(dev, b); + + /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' + https://github.com/umlaeute/v4l2loopback/issues/60 */ + buf->flags &= ~V4L2_BUF_FLAG_DONE; + buf->flags |= V4L2_BUF_FLAG_QUEUED; + + wake_up_all(&dev->read_event); + return 0; + default: + return -EINVAL; + } +} + +static int can_read(struct v4l2_loopback_device *dev, + struct v4l2_loopback_opener *opener) +{ + int ret; + + spin_lock_bh(&dev->lock); + check_timers(dev); + ret = dev->write_position > opener->read_position || + dev->reread_count > opener->reread_count || dev->timeout_happened; + spin_unlock_bh(&dev->lock); + return ret; +} + +static int get_capture_buffer(struct file *file) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + int pos, ret; + int timeout_happened; + + if ((file->f_flags & O_NONBLOCK) && + (dev->write_position <= opener->read_position && + dev->reread_count <= opener->reread_count && + !dev->timeout_happened)) + return -EAGAIN; + wait_event_interruptible(dev->read_event, can_read(dev, opener)); + + spin_lock_bh(&dev->lock); + if (dev->write_position == opener->read_position) { + if (dev->reread_count > opener->reread_count + 2) + opener->reread_count = dev->reread_count - 1; + ++opener->reread_count; + pos = (opener->read_position + dev->used_buffers - 1) % + dev->used_buffers; + } else { + opener->reread_count = 0; + if (dev->write_position > + opener->read_position + dev->used_buffers) + opener->read_position = dev->write_position - 1; + pos = opener->read_position % dev->used_buffers; + ++opener->read_position; + } + timeout_happened = dev->timeout_happened; + dev->timeout_happened = 0; + spin_unlock_bh(&dev->lock); + + ret = dev->bufpos2index[pos]; + if (timeout_happened) { + /* although allocated on-demand, timeout_image is freed only + * in free_buffers(), so we don't need to worry about it being + * deallocated suddenly */ + memcpy(dev->image + dev->buffers[ret].buffer.m.offset, + dev->timeout_image, dev->buffer_size); + } + return ret; +} + +/* put buffer to dequeue + * called on VIDIOC_DQBUF + */ +static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + int index; + struct v4l2l_buffer *b; + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + if (opener->timeout_image_io) { + *buf = dev->timeout_image_buffer.buffer; + return 0; + } + + switch (buf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + index = get_capture_buffer(file); + if (index < 0) + return index; + dprintkrw("capture DQBUF pos: %d index: %d\n", + opener->read_position - 1, index); + if (!(dev->buffers[index].buffer.flags & + V4L2_BUF_FLAG_MAPPED)) { + dprintk("trying to return not mapped buf[%d]\n", index); + return -EINVAL; + } + unset_flags(&dev->buffers[index]); + *buf = dev->buffers[index].buffer; + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + b = list_entry(dev->outbufs_list.prev, struct v4l2l_buffer, + list_head); + list_move_tail(&b->list_head, &dev->outbufs_list); + dprintkrw("output DQBUF index: %d\n", b->buffer.index); + unset_flags(b); + *buf = b->buffer; + buf->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + return 0; + default: + return -EINVAL; + } +} + +/* ------------- STREAMING ------------------- */ + +/* start streaming + * called on VIDIOC_STREAMON + */ +static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + MARK(); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + opener->type = WRITER; + dev->ready_for_output = 0; + if (!dev->ready_for_capture) { + int ret = allocate_buffers(dev); + if (ret < 0) + return ret; + } + dev->ready_for_capture++; + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + opener->type = READER; + if (!dev->ready_for_capture) + return -EIO; + return 0; + default: + return -EINVAL; + } + return -EINVAL; +} + +/* stop streaming + * called on VIDIOC_STREAMOFF + */ +static int vidioc_streamoff(struct file *file, void *fh, + enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev; + MARK(); + dprintk("%d\n", type); + + dev = v4l2loopback_getdevice(file); + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (dev->ready_for_capture > 0) + dev->ready_for_capture--; + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + return 0; + default: + return -EINVAL; + } + return -EINVAL; +} + +#ifdef CONFIG_VIDEO_V4L1_COMPAT +static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + p->frames = dev->buffers_number; + p->offsets[0] = 0; + p->offsets[1] = 0; + p->size = dev->buffer_size; + return 0; +} +#endif + +static int vidioc_subscribe_event(struct v4l2_fh *fh, + const struct v4l2_event_subscription *sub) +{ + switch (sub->type) { + case V4L2_EVENT_CTRL: + return v4l2_ctrl_subscribe_event(fh, sub); + } + + return -EINVAL; +} + +/* file operations */ +static void vm_open(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + buf->use_count++; +} + +static void vm_close(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + buf->use_count--; +} + +static struct vm_operations_struct vm_ops = { + .open = vm_open, + .close = vm_close, +}; + +static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned long addr; + unsigned long start; + unsigned long size; + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + struct v4l2l_buffer *buffer = NULL; + MARK(); + + start = (unsigned long)vma->vm_start; + size = (unsigned long)(vma->vm_end - vma->vm_start); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(file->private_data); + + if (size > dev->buffer_size) { + dprintk("userspace tries to mmap too much, fail\n"); + return -EINVAL; + } + if (opener->timeout_image_io) { + /* we are going to map the timeout_image_buffer */ + if ((vma->vm_pgoff << PAGE_SHIFT) != + dev->buffer_size * MAX_BUFFERS) { + dprintk("invalid mmap offset for timeout_image_io mode\n"); + return -EINVAL; + } + } else if ((vma->vm_pgoff << PAGE_SHIFT) > + dev->buffer_size * (dev->buffers_number - 1)) { + dprintk("userspace tries to mmap too far, fail\n"); + return -EINVAL; + } + + /* FIXXXXXME: allocation should not happen here! */ + if (NULL == dev->image) + if (allocate_buffers(dev) < 0) + return -EINVAL; + + if (opener->timeout_image_io) { + buffer = &dev->timeout_image_buffer; + addr = (unsigned long)dev->timeout_image; + } else { + int i; + for (i = 0; i < dev->buffers_number; ++i) { + buffer = &dev->buffers[i]; + if ((buffer->buffer.m.offset >> PAGE_SHIFT) == + vma->vm_pgoff) + break; + } + + if (NULL == buffer) + return -EINVAL; + + addr = (unsigned long)dev->image + + (vma->vm_pgoff << PAGE_SHIFT); + } + + while (size > 0) { + struct page *page; + + page = (void *)vmalloc_to_page((void *)addr); + + if (vm_insert_page(vma, start, page) < 0) + return -EAGAIN; + + start += PAGE_SIZE; + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + vma->vm_ops = &vm_ops; + vma->vm_private_data = buffer; + buffer->buffer.flags |= V4L2_BUF_FLAG_MAPPED; + + vm_open(vma); + + MARK(); + return 0; +} + +static unsigned int v4l2_loopback_poll(struct file *file, + struct poll_table_struct *pts) +{ + struct v4l2_loopback_opener *opener; + struct v4l2_loopback_device *dev; + __poll_t req_events = poll_requested_events(pts); + int ret_mask = 0; + MARK(); + + opener = fh_to_opener(file->private_data); + dev = v4l2loopback_getdevice(file); + + if (req_events & POLLPRI) { + if (!v4l2_event_pending(&opener->fh)) + poll_wait(file, &opener->fh.wait, pts); + if (v4l2_event_pending(&opener->fh)) { + ret_mask |= POLLPRI; + if (!(req_events & DEFAULT_POLLMASK)) + return ret_mask; + } + } + + switch (opener->type) { + case WRITER: + ret_mask |= POLLOUT | POLLWRNORM; + break; + case READER: + if (!can_read(dev, opener)) { + if (ret_mask) + return ret_mask; + poll_wait(file, &dev->read_event, pts); + } + if (can_read(dev, opener)) + ret_mask |= POLLIN | POLLRDNORM; + if (v4l2_event_pending(&opener->fh)) + ret_mask |= POLLPRI; + break; + default: + break; + } + + MARK(); + return ret_mask; +} + +/* do not want to limit device opens, it can be as many readers as user want, + * writers are limited by means of setting writer field */ +static int v4l2_loopback_open(struct file *file) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + MARK(); + dev = v4l2loopback_getdevice(file); + if (dev->open_count.counter >= dev->max_openers) + return -EBUSY; + /* kfree on close */ + opener = kzalloc(sizeof(*opener), GFP_KERNEL); + if (opener == NULL) + return -ENOMEM; + + v4l2_fh_init(&opener->fh, video_devdata(file)); + file->private_data = &opener->fh; + atomic_inc(&dev->open_count); + + opener->timeout_image_io = dev->timeout_image_io; + dev->timeout_image_io = 0; + + if (opener->timeout_image_io) { + int r = allocate_timeout_image(dev); + + if (r < 0) { + dprintk("timeout image allocation failed\n"); + return r; + } + } + + v4l2_fh_add(&opener->fh); + dprintk("opened dev:%p with image:%p\n", dev, dev ? dev->image : NULL); + MARK(); + return 0; +} + +static int v4l2_loopback_close(struct file *file) +{ + struct v4l2_loopback_opener *opener; + struct v4l2_loopback_device *dev; + int iswriter = 0; + MARK(); + + opener = fh_to_opener(file->private_data); + dev = v4l2loopback_getdevice(file); + + if (WRITER == opener->type) + iswriter = 1; + + atomic_dec(&dev->open_count); + if (dev->open_count.counter == 0) { + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + } + try_free_buffers(dev); + + v4l2_fh_del(&opener->fh); + v4l2_fh_exit(&opener->fh); + + kfree(opener); + if (iswriter) { + dev->ready_for_output = 1; + } + MARK(); + return 0; +} + +static ssize_t v4l2_loopback_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int read_index; + struct v4l2_loopback_device *dev; + struct v4l2_buffer *b; + MARK(); + + dev = v4l2loopback_getdevice(file); + + read_index = get_capture_buffer(file); + if (read_index < 0) + return read_index; + if (count > dev->buffer_size) + count = dev->buffer_size; + b = &dev->buffers[read_index].buffer; + if (count > b->bytesused) + count = b->bytesused; + if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset), + count)) { + printk(KERN_ERR + "v4l2-loopback: failed copy_to_user() in read buf\n"); + return -EFAULT; + } + dprintkrw("leave v4l2_loopback_read()\n"); + return count; +} + +static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct v4l2_loopback_device *dev; + int write_index; + struct v4l2_buffer *b; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* there's at least one writer, so don't stop announcing output capabilities */ + dev->ready_for_output = 0; + + if (!dev->ready_for_capture) { + int ret = allocate_buffers(dev); + if (ret < 0) + return ret; + dev->ready_for_capture = 1; + } + dprintkrw("v4l2_loopback_write() trying to write %zu bytes\n", count); + if (count > dev->buffer_size) + count = dev->buffer_size; + + write_index = dev->write_position % dev->used_buffers; + b = &dev->buffers[write_index].buffer; + + if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf, + count)) { + printk(KERN_ERR + "v4l2-loopback: failed copy_from_user() in write buf, could not write %zu\n", + count); + return -EFAULT; + } + v4l2l_get_timestamp(b); + b->bytesused = count; + b->sequence = dev->write_position; + buffer_written(dev, &dev->buffers[write_index]); + wake_up_all(&dev->read_event); + dprintkrw("leave v4l2_loopback_write()\n"); + return count; +} + +/* init functions */ +/* frees buffers, if already allocated */ +static int free_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + dprintk("freeing image@%p for dev:%p\n", dev ? dev->image : NULL, dev); + if (dev->image) { + vfree(dev->image); + dev->image = NULL; + } + if (dev->timeout_image) { + vfree(dev->timeout_image); + dev->timeout_image = NULL; + } + dev->imagesize = 0; + + return 0; +} +/* frees buffers, if they are no longer needed */ +static void try_free_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + if (0 == dev->open_count.counter && !dev->keep_format) { + free_buffers(dev); + dev->ready_for_capture = 0; + dev->buffer_size = 0; + dev->write_position = 0; + } +} +/* allocates buffers, if buffer_size is set */ +static int allocate_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + /* vfree on close file operation in case no open handles left */ + if (0 == dev->buffer_size) + return -EINVAL; + + if (dev->image) { + dprintk("allocating buffers again: %ld %ld\n", + dev->buffer_size * dev->buffers_number, dev->imagesize); + /* FIXME: prevent double allocation more intelligently! */ + if (dev->buffer_size * dev->buffers_number == dev->imagesize) + return 0; + + /* if there is only one writer, no problem should occur */ + if (dev->open_count.counter == 1) + free_buffers(dev); + else + return -EINVAL; + } + + dev->imagesize = dev->buffer_size * dev->buffers_number; + + dprintk("allocating %ld = %ldx%d\n", dev->imagesize, dev->buffer_size, + dev->buffers_number); + + dev->image = vmalloc(dev->imagesize); + if (dev->timeout_jiffies > 0) + allocate_timeout_image(dev); + + if (dev->image == NULL) + return -ENOMEM; + dprintk("vmallocated %ld bytes\n", dev->imagesize); + MARK(); + init_buffers(dev); + return 0; +} + +/* init inner buffers, they are capture mode and flags are set as + * for capture mod buffers */ +static void init_buffers(struct v4l2_loopback_device *dev) +{ + int i; + int buffer_size; + int bytesused; + MARK(); + + buffer_size = dev->buffer_size; + bytesused = dev->pix_format.sizeimage; + + for (i = 0; i < dev->buffers_number; ++i) { + struct v4l2_buffer *b = &dev->buffers[i].buffer; + b->index = i; + b->bytesused = bytesused; + b->length = buffer_size; + b->field = V4L2_FIELD_NONE; + b->flags = 0; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 1) + b->input = 0; +#endif + b->m.offset = i * buffer_size; + b->memory = V4L2_MEMORY_MMAP; + b->sequence = 0; + b->timestamp.tv_sec = 0; + b->timestamp.tv_usec = 0; + b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + v4l2l_get_timestamp(b); + } + dev->timeout_image_buffer = dev->buffers[0]; + dev->timeout_image_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size; + MARK(); +} + +static int allocate_timeout_image(struct v4l2_loopback_device *dev) +{ + MARK(); + if (dev->buffer_size <= 0) + return -EINVAL; + + if (dev->timeout_image == NULL) { + dev->timeout_image = v4l2l_vzalloc(dev->buffer_size); + if (dev->timeout_image == NULL) + return -ENOMEM; + } + return 0; +} + +/* fills and register video device */ +static void init_vdev(struct video_device *vdev, int nr) +{ + MARK(); + +#ifdef V4L2LOOPBACK_WITH_STD + vdev->tvnorms = V4L2_STD_ALL; +#endif /* V4L2LOOPBACK_WITH_STD */ + + vdev->vfl_type = VFL_TYPE_VIDEO; + vdev->fops = &v4l2_loopback_fops; + vdev->ioctl_ops = &v4l2_loopback_ioctl_ops; + vdev->release = &video_device_release; + vdev->minor = -1; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT | + V4L2_CAP_READWRITE | V4L2_CAP_STREAMING; +#ifdef V4L2_CAP_VIDEO_M2M + vdev->device_caps |= V4L2_CAP_VIDEO_M2M; +#endif +#endif /* >=linux-4.7.0 */ + + if (debug > 1) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 20, 0) + vdev->debug = V4L2_DEBUG_IOCTL | V4L2_DEBUG_IOCTL_ARG; +#else + vdev->dev_debug = + V4L2_DEV_DEBUG_IOCTL | V4L2_DEV_DEBUG_IOCTL_ARG; +#endif + + /* since kernel-3.7, there is a new field 'vfl_dir' that has to be + * set to VFL_DIR_M2M for bidirectional devices */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + vdev->vfl_dir = VFL_DIR_M2M; +#endif + + MARK(); +} + +/* init default capture parameters, only fps may be changed in future */ +static void init_capture_param(struct v4l2_captureparm *capture_param) +{ + MARK(); + capture_param->capability = 0; + capture_param->capturemode = 0; + capture_param->extendedmode = 0; + capture_param->readbuffers = max_buffers; + capture_param->timeperframe.numerator = 1; + capture_param->timeperframe.denominator = 30; +} + +static void check_timers(struct v4l2_loopback_device *dev) +{ + if (!dev->ready_for_capture) + return; + + if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer)) + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer)) + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies * 3 / 2); +} +#ifdef HAVE_TIMER_SETUP +static void sustain_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, sustain_timer); +#else +static void sustain_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->sustain_framerate) { + dev->reread_count++; + dprintkrw("reread: %d %d\n", dev->write_position, + dev->reread_count); + if (dev->reread_count == 1) + mod_timer(&dev->sustain_timer, + jiffies + max(1UL, dev->frame_jiffies / 2)); + else + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} +#ifdef HAVE_TIMER_SETUP +static void timeout_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, timeout_timer); +#else +static void timeout_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->timeout_jiffies > 0) { + dev->timeout_happened = 1; + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} + +/* init loopback main structure */ +#define DEFAULT_FROM_CONF(confmember, default_condition, default_value) \ + ((conf) ? \ + ((conf->confmember default_condition) ? (default_value) : \ + (conf->confmember)) : \ + default_value) + +static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr) +{ + struct v4l2_loopback_device *dev; + struct v4l2_ctrl_handler *hdl; + + int err = -ENOMEM; + + int _max_width = DEFAULT_FROM_CONF( + max_width, <= V4L2LOOPBACK_SIZE_MIN_WIDTH, max_width); + int _max_height = DEFAULT_FROM_CONF( + max_height, <= V4L2LOOPBACK_SIZE_MIN_HEIGHT, max_height); + bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ? + (conf->announce_all_caps) : + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS; + + int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers); + int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers); + + int nr = -1; + if (conf) { + if (conf->capture_nr >= 0 && + conf->output_nr == conf->capture_nr) { + nr = conf->capture_nr; + } else if (conf->capture_nr < 0 && conf->output_nr < 0) { + nr = -1; + } else if (conf->capture_nr < 0) { + nr = conf->output_nr; + } else if (conf->output_nr < 0) { + nr = conf->capture_nr; + } else { + printk(KERN_ERR + "split OUTPUT and CAPTURE devices not yet supported."); + printk(KERN_INFO + "both devices must have the same number (%d != %d).", + conf->output_nr, conf->capture_nr); + return -EINVAL; + } + } + + if (idr_find(&v4l2loopback_index_idr, nr)) + return -EEXIST; + + dprintk("creating v4l2loopback-device #%d\n", nr); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + /* allocate id, if @id >= 0, we're requesting that specific id */ + if (nr >= 0) { + err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1, + GFP_KERNEL); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL); + } + if (err < 0) + goto out_free_dev; + nr = err; + err = -ENOMEM; + + if (conf && conf->card_label && *(conf->card_label)) { + snprintf(dev->card_label, sizeof(dev->card_label), "%s", + conf->card_label); + } else { + snprintf(dev->card_label, sizeof(dev->card_label), + "Dummy video device (0x%04X)", nr); + } + snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), + "v4l2loopback-%03d", nr); + + err = v4l2_device_register(NULL, &dev->v4l2_dev); + if (err) + goto out_free_idr; + MARK(); + + dev->vdev = video_device_alloc(); + if (dev->vdev == NULL) { + err = -ENOMEM; + goto out_unregister; + } + video_set_drvdata(dev->vdev, + kzalloc(sizeof(struct v4l2loopback_private), + GFP_KERNEL)); + if (video_get_drvdata(dev->vdev) == NULL) { + err = -ENOMEM; + goto out_unregister; + } + + MARK(); + snprintf(dev->vdev->name, sizeof(dev->vdev->name), dev->card_label); + + ((struct v4l2loopback_private *)video_get_drvdata(dev->vdev)) + ->device_nr = nr; + + init_vdev(dev->vdev, nr); + dev->vdev->v4l2_dev = &dev->v4l2_dev; + init_capture_param(&dev->capture_param); + err = set_timeperframe(dev, &dev->capture_param.timeperframe); + if (err) + goto out_unregister; + dev->keep_format = 0; + dev->sustain_framerate = 0; + + dev->announce_all_caps = _announce_all_caps; + dev->max_width = _max_width; + dev->max_height = _max_height; + dev->max_openers = _max_openers; + dev->buffers_number = dev->used_buffers = _max_buffers; + + dev->write_position = 0; + + MARK(); + spin_lock_init(&dev->lock); + INIT_LIST_HEAD(&dev->outbufs_list); + if (list_empty(&dev->outbufs_list)) { + int i; + + for (i = 0; i < dev->used_buffers; ++i) + list_add_tail(&dev->buffers[i].list_head, + &dev->outbufs_list); + } + memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index)); + atomic_set(&dev->open_count, 0); + dev->ready_for_capture = 0; + dev->ready_for_output = 1; + + dev->buffer_size = 0; + dev->image = NULL; + dev->imagesize = 0; +#ifdef HAVE_TIMER_SETUP + timer_setup(&dev->sustain_timer, sustain_timer_clb, 0); + timer_setup(&dev->timeout_timer, timeout_timer_clb, 0); +#else + setup_timer(&dev->sustain_timer, sustain_timer_clb, nr); + setup_timer(&dev->timeout_timer, timeout_timer_clb, nr); +#endif + dev->reread_count = 0; + dev->timeout_jiffies = 0; + dev->timeout_image = NULL; + dev->timeout_happened = 0; + + hdl = &dev->ctrl_handler; + err = v4l2_ctrl_handler_init(hdl, 4); + if (err) + goto out_unregister; + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL); + if (hdl->error) { + err = hdl->error; + goto out_free_handler; + } + dev->v4l2_dev.ctrl_handler = hdl; + + err = v4l2_ctrl_handler_setup(hdl); + if (err) + goto out_free_handler; + + /* FIXME set buffers to 0 */ + + /* Set initial format */ + dev->pix_format.width = 0; /* V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; */ + dev->pix_format.height = 0; /* V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; */ + dev->pix_format.pixelformat = formats[0].fourcc; + dev->pix_format.colorspace = + V4L2_COLORSPACE_SRGB; /* do we need to set this ? */ + dev->pix_format.field = V4L2_FIELD_NONE; + + dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage); + dprintk("buffer_size = %ld (=%d)\n", dev->buffer_size, + dev->pix_format.sizeimage); + err = allocate_buffers(dev); + if (err && dev->buffer_size) + goto out_free_handler; + + init_waitqueue_head(&dev->read_event); + + /* register the device -> it creates /dev/video* */ + if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) { + printk(KERN_ERR + "v4l2loopback: failed video_register_device()\n"); + err = -EFAULT; + goto out_free_device; + } + v4l2loopback_create_sysfs(dev->vdev); + + MARK(); + if (ret_nr) + *ret_nr = dev->vdev->num; + return 0; + +out_free_device: + video_device_release(dev->vdev); +out_free_handler: + v4l2_ctrl_handler_free(&dev->ctrl_handler); +out_unregister: + v4l2_device_unregister(&dev->v4l2_dev); +out_free_idr: + idr_remove(&v4l2loopback_index_idr, nr); +out_free_dev: + kfree(dev); + return err; +} + +static void v4l2_loopback_remove(struct v4l2_loopback_device *dev) +{ + free_buffers(dev); + v4l2loopback_remove_sysfs(dev->vdev); + kfree(video_get_drvdata(dev->vdev)); + video_unregister_device(dev->vdev); + v4l2_device_unregister(&dev->v4l2_dev); + v4l2_ctrl_handler_free(&dev->ctrl_handler); + kfree(dev); +} + +static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_config conf; + struct v4l2_loopback_config *confptr = &conf; + int device_nr; + int ret; + + ret = mutex_lock_killable(&v4l2loopback_ctl_mutex); + if (ret) + return ret; + + ret = -EINVAL; + switch (cmd) { + default: + ret = -ENOSYS; + break; + /* add a v4l2loopback device (pair), based on the user-provided specs */ + case V4L2LOOPBACK_CTL_ADD: + if (parm) { + if ((ret = copy_from_user(&conf, (void *)parm, + sizeof(conf))) < 0) + break; + } else + confptr = NULL; + ret = v4l2_loopback_add(confptr, &device_nr); + if (ret >= 0) + ret = device_nr; + break; + /* remove a v4l2loopback device (both capture and output) */ + case V4L2LOOPBACK_CTL_REMOVE: + ret = v4l2loopback_lookup((int)parm, &dev); + if (ret >= 0 && dev) { + int nr = ret; + ret = -EBUSY; + if (dev->open_count.counter > 0) + break; + idr_remove(&v4l2loopback_index_idr, nr); + v4l2_loopback_remove(dev); + ret = 0; + }; + break; + /* get information for a loopback device. + * this is mostly about limits (which cannot be queried directly with VIDIOC_G_FMT and friends + */ + case V4L2LOOPBACK_CTL_QUERY: + if (!parm) + break; + if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < + 0) + break; + device_nr = + (conf.output_nr < 0) ? conf.capture_nr : conf.output_nr; + MARK(); + /* get the device from either capture_nr or output_nr (whatever is valid) */ + if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0) + break; + MARK(); + /* if we got the device from output_nr and there is a valid capture_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != conf.capture_nr) && (conf.capture_nr >= 0) && + (ret != v4l2loopback_lookup(conf.capture_nr, 0))) + break; + MARK(); + /* if otoh, we got the device from capture_nr and there is a valid output_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != conf.output_nr) && (conf.output_nr >= 0) && + (ret != v4l2loopback_lookup(conf.output_nr, 0))) + break; + MARK(); + + /* v4l2_loopback_config identified a single device, so fetch the data */ + snprintf(conf.card_label, sizeof(conf.card_label), "%s", + dev->card_label); + MARK(); + conf.output_nr = conf.capture_nr = dev->vdev->num; + conf.max_width = dev->max_width; + conf.max_height = dev->max_height; + conf.announce_all_caps = dev->announce_all_caps; + conf.max_buffers = dev->buffers_number; + conf.max_openers = dev->max_openers; + conf.debug = debug; + MARK(); + if (copy_to_user((void *)parm, &conf, sizeof(conf))) { + ret = -EFAULT; + break; + } + MARK(); + ret = 0; + ; + break; + } + + MARK(); + mutex_unlock(&v4l2loopback_ctl_mutex); + MARK(); + return ret; +} + +/* LINUX KERNEL */ + +static const struct file_operations v4l2loopback_ctl_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = v4l2loopback_control_ioctl, + .compat_ioctl = v4l2loopback_control_ioctl, + .llseek = noop_llseek, + // clang-format on +}; + +static struct miscdevice v4l2loopback_misc = { + // clang-format off + .minor = MISC_DYNAMIC_MINOR, + .name = "v4l2loopback", + .fops = &v4l2loopback_ctl_fops, + // clang-format on +}; + +static const struct v4l2_file_operations v4l2_loopback_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = v4l2_loopback_open, + .release = v4l2_loopback_close, + .read = v4l2_loopback_read, + .write = v4l2_loopback_write, + .poll = v4l2_loopback_poll, + .mmap = v4l2_loopback_mmap, + .unlocked_ioctl = video_ioctl2, + // clang-format on +}; + +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = { + // clang-format off + .vidioc_querycap = &vidioc_querycap, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) + .vidioc_enum_framesizes = &vidioc_enum_framesizes, + .vidioc_enum_frameintervals = &vidioc_enum_frameintervals, +#endif + +#ifndef HAVE__V4L2_CTRLS + .vidioc_queryctrl = &vidioc_queryctrl, + .vidioc_g_ctrl = &vidioc_g_ctrl, + .vidioc_s_ctrl = &vidioc_s_ctrl, +#endif /* HAVE__V4L2_CTRLS */ + + .vidioc_enum_output = &vidioc_enum_output, + .vidioc_g_output = &vidioc_g_output, + .vidioc_s_output = &vidioc_s_output, + + .vidioc_enum_input = &vidioc_enum_input, + .vidioc_g_input = &vidioc_g_input, + .vidioc_s_input = &vidioc_s_input, + + .vidioc_enum_fmt_vid_cap = &vidioc_enum_fmt_cap, + .vidioc_g_fmt_vid_cap = &vidioc_g_fmt_cap, + .vidioc_s_fmt_vid_cap = &vidioc_s_fmt_cap, + .vidioc_try_fmt_vid_cap = &vidioc_try_fmt_cap, + + .vidioc_enum_fmt_vid_out = &vidioc_enum_fmt_out, + .vidioc_s_fmt_vid_out = &vidioc_s_fmt_out, + .vidioc_g_fmt_vid_out = &vidioc_g_fmt_out, + .vidioc_try_fmt_vid_out = &vidioc_try_fmt_out, + +#ifdef V4L2L_OVERLAY + .vidioc_s_fmt_vid_overlay = &vidioc_s_fmt_overlay, + .vidioc_g_fmt_vid_overlay = &vidioc_g_fmt_overlay, +#endif + +#ifdef V4L2LOOPBACK_WITH_STD + .vidioc_s_std = &vidioc_s_std, + .vidioc_g_std = &vidioc_g_std, + .vidioc_querystd = &vidioc_querystd, +#endif /* V4L2LOOPBACK_WITH_STD */ + + .vidioc_g_parm = &vidioc_g_parm, + .vidioc_s_parm = &vidioc_s_parm, + + .vidioc_reqbufs = &vidioc_reqbufs, + .vidioc_querybuf = &vidioc_querybuf, + .vidioc_qbuf = &vidioc_qbuf, + .vidioc_dqbuf = &vidioc_dqbuf, + + .vidioc_streamon = &vidioc_streamon, + .vidioc_streamoff = &vidioc_streamoff, + +#ifdef CONFIG_VIDEO_V4L1_COMPAT + .vidiocgmbuf = &vidiocgmbuf, +#endif + + .vidioc_subscribe_event = &vidioc_subscribe_event, + .vidioc_unsubscribe_event = &v4l2_event_unsubscribe, + // clang-format on +}; + +static int free_device_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *dev = ptr; + v4l2_loopback_remove(dev); + return 0; +} +static void free_devices(void) +{ + idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL); + idr_destroy(&v4l2loopback_index_idr); +} + +static int __init v4l2loopback_init_module(void) +{ + int err; + int i; + MARK(); + + err = misc_register(&v4l2loopback_misc); + if (err < 0) + return err; + + if (devices < 0) { + devices = 1; + + /* try guessing the devices from the "video_nr" parameter */ + for (i = MAX_DEVICES - 1; i >= 0; i--) { + if (video_nr[i] >= 0) { + devices = i + 1; + break; + } + } + } + + if (devices > MAX_DEVICES) { + devices = MAX_DEVICES; + printk(KERN_INFO + "v4l2loopback: number of initial devices is limited to: %d\n", + MAX_DEVICES); + } + + if (max_buffers > MAX_BUFFERS) { + max_buffers = MAX_BUFFERS; + printk(KERN_INFO + "v4l2loopback: number of buffers is limited to: %d\n", + MAX_BUFFERS); + } + + if (max_openers < 0) { + printk(KERN_INFO + "v4l2loopback: allowing %d openers rather than %d\n", + 2, max_openers); + max_openers = 2; + } + + if (max_width < 1) { + max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; + printk(KERN_INFO "v4l2loopback: using max_width %d\n", + max_width); + } + if (max_height < 1) { + max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; + printk(KERN_INFO "v4l2loopback: using max_height %d\n", + max_height); + } + + /* kfree on module release */ + for (i = 0; i < devices; i++) { + struct v4l2_loopback_config cfg = { + // clang-format off + .output_nr = video_nr[i], + .capture_nr = video_nr[i], + .max_width = max_width, + .max_height = max_height, + .announce_all_caps = (!exclusive_caps[i]), + .max_buffers = max_buffers, + .max_openers = max_openers, + .debug = debug, + // clang-format on + }; + cfg.card_label[0] = 0; + if (card_label[i]) + snprintf(cfg.card_label, sizeof(cfg.card_label), "%s", + card_label[i]); + err = v4l2_loopback_add(&cfg, 0); + if (err) { + free_devices(); + goto error; + } + } + + dprintk("module installed\n"); + + printk(KERN_INFO "v4l2loopback driver version %d.%d.%d%s loaded\n", + // clang-format off + (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff, + (V4L2LOOPBACK_VERSION_CODE >> 8) & 0xff, + (V4L2LOOPBACK_VERSION_CODE ) & 0xff, +#ifdef SNAPSHOT_VERSION + " (" STRINGIFY2(SNAPSHOT_VERSION) ")" +#else + "" +#endif + ); + // clang-format on + + return 0; +error: + misc_deregister(&v4l2loopback_misc); + return err; +} + +#ifdef MODULE +static void v4l2loopback_cleanup_module(void) +{ + MARK(); + /* unregister the device -> it deletes /dev/video* */ + free_devices(); + /* and get rid of /dev/v4l2loopback */ + misc_deregister(&v4l2loopback_misc); + dprintk("module removed\n"); +} +#endif + +MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR); + +module_init(v4l2loopback_init_module); +module_exit(v4l2loopback_cleanup_module); + +/* + * fake usage of unused functions + */ +#ifdef HAVE__V4L2_CTRLS +static int vidioc_queryctrl(struct file *file, void *fh, + struct v4l2_queryctrl *q) __attribute__((unused)); +static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *c) + __attribute__((unused)); +static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *c) + __attribute__((unused)); +#endif /* HAVE__V4L2_CTRLS */ diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h new file mode 100644 index 0000000000000..fb7180802c3f1 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * v4l2loopback.h + * + * Written by IOhannes m zmölnig, 7/1/20. + * + * Copyright 2020 by IOhannes m zmölnig. Redistribution of this file is + * permitted under the GNU General Public License. + */ +#ifndef _V4L2LOOPBACK_H +#define _V4L2LOOPBACK_H + +#define V4L2LOOPBACK_VERSION_MAJOR 0 +#define V4L2LOOPBACK_VERSION_MINOR 12 +#define V4L2LOOPBACK_VERSION_BUGFIX 5 + +/* /dev/v4l2loopback interface */ + +struct v4l2_loopback_config { + /** + * the device-number (/dev/video) + * V4L2LOOPBACK_CTL_ADD: + * setting this to a value<0, will allocate an available one + * if nr>=0 and the device already exists, the ioctl will EEXIST + * if output_nr and capture_nr are the same, only a single device will be created + * + * V4L2LOOPBACK_CTL_QUERY: + * either both output_nr and capture_nr must refer to the same loopback, + * or one (and only one) of them must be -1 + * + */ + int output_nr; + int capture_nr; + + /** + * a nice name for your device + * if (*card_label)==0, an automatic name is assigned + */ + char card_label[32]; + + /** + * maximum allowed frame size + * if too low, default values are used + */ + int max_width; + int max_height; + + /** + * whether to announce OUTPUT/CAPTURE capabilities exclusively + * for this device or not + * (!exclusive_caps) + * FIXXME: this ought to be removed (if superseded by output_nr vs capture_nr) + */ + int announce_all_caps; + + /** + * number of buffers to allocate for the queue + * if set to <=0, default values are used + */ + int max_buffers; + + /** + * how many consumers are allowed to open this device concurrently + * if set to <=0, default values are used + */ + int max_openers; + + /** + * set the debugging level for this device + */ + int debug; +}; + +/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the + * to-be-created device set. + * if the ptr is NULL, a new device is created with default values at the driver's discretion. + * + * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY, + * to get more information on the device) + */ +#define V4L2LOOPBACK_CTL_ADD 0x4C80 + +/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set + * (the two values must either refer to video-devices associated with the same loopback device + * or exactly one of them must be <0 + */ +#define V4L2LOOPBACK_CTL_QUERY 0x4C82 + +/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ +#define V4L2LOOPBACK_CTL_REMOVE 0x4C81 + +#endif /* _V4L2LOOPBACK_H */ diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h new file mode 100644 index 0000000000000..0a4e458526b37 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback_formats.h @@ -0,0 +1,437 @@ +static const struct v4l2l_format formats[] = { +#ifndef V4L2_PIX_FMT_VP9 +#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') +#endif +#ifndef V4L2_PIX_FMT_HEVC +#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') +#endif + + /* here come the packed formats */ + { + .name = "32 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR32, + .depth = 32, + .flags = 0, + }, + { + .name = "32 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB32, + .depth = 32, + .flags = 0, + }, + { + .name = "24 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR24, + .depth = 24, + .flags = 0, + }, + { + .name = "24 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB24, + .depth = 24, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_RGBA32 + { + .name = "32 bpp RGBA", + .fourcc = V4L2_PIX_FMT_RGBA32, + .depth = 32, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_RGB332 + { + .name = "8 bpp RGB-3-3-2", + .fourcc = V4L2_PIX_FMT_RGB332, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB332 */ +#ifdef V4L2_PIX_FMT_RGB444 + { + .name = "16 bpp RGB (xxxxrrrr ggggbbbb)", + .fourcc = V4L2_PIX_FMT_RGB444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB444 */ +#ifdef V4L2_PIX_FMT_RGB555 + { + .name = "16 bpp RGB-5-5-5", + .fourcc = V4L2_PIX_FMT_RGB555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555 */ +#ifdef V4L2_PIX_FMT_RGB565 + { + .name = "16 bpp RGB-5-6-5", + .fourcc = V4L2_PIX_FMT_RGB565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565 */ +#ifdef V4L2_PIX_FMT_RGB555X + { + .name = "16 bpp RGB-5-5-5 BE", + .fourcc = V4L2_PIX_FMT_RGB555X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555X */ +#ifdef V4L2_PIX_FMT_RGB565X + { + .name = "16 bpp RGB-5-6-5 BE", + .fourcc = V4L2_PIX_FMT_RGB565X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565X */ +#ifdef V4L2_PIX_FMT_BGR666 + { + .name = "18 bpp BGR-6-6-6", + .fourcc = V4L2_PIX_FMT_BGR666, + .depth = 18, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_BGR666 */ + { + .name = "4:2:2, packed, YUYV", + .fourcc = V4L2_PIX_FMT_YUYV, + .depth = 16, + .flags = 0, + }, + { + .name = "4:2:2, packed, UYVY", + .fourcc = V4L2_PIX_FMT_UYVY, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YVYU + { + .name = "4:2:2, packed YVYU", + .fourcc = V4L2_PIX_FMT_YVYU, + .depth = 16, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_VYUY + { + .name = "4:2:2, packed VYUY", + .fourcc = V4L2_PIX_FMT_VYUY, + .depth = 16, + .flags = 0, + }, +#endif + { + .name = "4:2:2, packed YYUV", + .fourcc = V4L2_PIX_FMT_YYUV, + .depth = 16, + .flags = 0, + }, + { + .name = "YUV-8-8-8-8", + .fourcc = V4L2_PIX_FMT_YUV32, + .depth = 32, + .flags = 0, + }, + { + .name = "8 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_GREY, + .depth = 8, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_Y4 + { + .name = "4 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y4, + .depth = 4, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y4 */ +#ifdef V4L2_PIX_FMT_Y6 + { + .name = "6 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y6, + .depth = 6, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y6 */ +#ifdef V4L2_PIX_FMT_Y10 + { + .name = "10 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y10, + .depth = 10, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y10 */ +#ifdef V4L2_PIX_FMT_Y12 + { + .name = "12 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y12, + .depth = 12, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y12 */ + { + .name = "16 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_Y16, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YUV444 + { + .name = "16 bpp xxxxyyyy uuuuvvvv", + .fourcc = V4L2_PIX_FMT_YUV444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV444 */ +#ifdef V4L2_PIX_FMT_YUV555 + { + .name = "16 bpp YUV-5-5-5", + .fourcc = V4L2_PIX_FMT_YUV555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV555 */ +#ifdef V4L2_PIX_FMT_YUV565 + { + .name = "16 bpp YUV-5-6-5", + .fourcc = V4L2_PIX_FMT_YUV565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV565 */ + +/* bayer formats */ +#ifdef V4L2_PIX_FMT_SRGGB8 + { + .name = "Bayer RGGB 8bit", + .fourcc = V4L2_PIX_FMT_SRGGB8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SRGGB8 */ +#ifdef V4L2_PIX_FMT_SGRBG8 + { + .name = "Bayer GRBG 8bit", + .fourcc = V4L2_PIX_FMT_SGRBG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGRBG8 */ +#ifdef V4L2_PIX_FMT_SGBRG8 + { + .name = "Bayer GBRG 8bit", + .fourcc = V4L2_PIX_FMT_SGBRG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGBRG8 */ +#ifdef V4L2_PIX_FMT_SBGGR8 + { + .name = "Bayer BA81 8bit", + .fourcc = V4L2_PIX_FMT_SBGGR8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SBGGR8 */ + + /* here come the planar formats */ + { + .name = "4:1:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:1:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#ifdef V4L2_PIX_FMT_YUV422P + { + .name = "16 bpp YVU422 planar", + .fourcc = V4L2_PIX_FMT_YUV422P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV422P */ +#ifdef V4L2_PIX_FMT_YUV411P + { + .name = "16 bpp YVU411 planar", + .fourcc = V4L2_PIX_FMT_YUV411P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV411P */ +#ifdef V4L2_PIX_FMT_Y41P + { + .name = "12 bpp YUV 4:1:1", + .fourcc = V4L2_PIX_FMT_Y41P, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_Y41P */ +#ifdef V4L2_PIX_FMT_NV12 + { + .name = "12 bpp Y/CbCr 4:2:0 ", + .fourcc = V4L2_PIX_FMT_NV12, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_NV12 */ + +/* here come the compressed formats */ + +#ifdef V4L2_PIX_FMT_MJPEG + { + .name = "Motion-JPEG", + .fourcc = V4L2_PIX_FMT_MJPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MJPEG */ +#ifdef V4L2_PIX_FMT_JPEG + { + .name = "JFIF JPEG", + .fourcc = V4L2_PIX_FMT_JPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_JPEG */ +#ifdef V4L2_PIX_FMT_DV + { + .name = "DV1394", + .fourcc = V4L2_PIX_FMT_DV, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_DV */ +#ifdef V4L2_PIX_FMT_MPEG + { + .name = "MPEG-1/2/4 Multiplexed", + .fourcc = V4L2_PIX_FMT_MPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG */ +#ifdef V4L2_PIX_FMT_H264 + { + .name = "H264 with start codes", + .fourcc = V4L2_PIX_FMT_H264, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264 */ +#ifdef V4L2_PIX_FMT_H264_NO_SC + { + .name = "H264 without start codes", + .fourcc = V4L2_PIX_FMT_H264_NO_SC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_NO_SC */ +#ifdef V4L2_PIX_FMT_H264_MVC + { + .name = "H264 MVC", + .fourcc = V4L2_PIX_FMT_H264_MVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_MVC */ +#ifdef V4L2_PIX_FMT_H263 + { + .name = "H263", + .fourcc = V4L2_PIX_FMT_H263, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H263 */ +#ifdef V4L2_PIX_FMT_MPEG1 + { + .name = "MPEG-1 ES", + .fourcc = V4L2_PIX_FMT_MPEG1, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG1 */ +#ifdef V4L2_PIX_FMT_MPEG2 + { + .name = "MPEG-2 ES", + .fourcc = V4L2_PIX_FMT_MPEG2, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG2 */ +#ifdef V4L2_PIX_FMT_MPEG4 + { + .name = "MPEG-4 part 2 ES", + .fourcc = V4L2_PIX_FMT_MPEG4, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG4 */ +#ifdef V4L2_PIX_FMT_XVID + { + .name = "Xvid", + .fourcc = V4L2_PIX_FMT_XVID, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_XVID */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_G + { + .name = "SMPTE 421M Annex G compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_G, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_L + { + .name = "SMPTE 421M Annex L compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_L, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */ +#ifdef V4L2_PIX_FMT_VP8 + { + .name = "VP8", + .fourcc = V4L2_PIX_FMT_VP8, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP8 */ +#ifdef V4L2_PIX_FMT_VP9 + { + .name = "VP9", + .fourcc = V4L2_PIX_FMT_VP9, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP9 */ +#ifdef V4L2_PIX_FMT_HEVC + { + .name = "HEVC", + .fourcc = V4L2_PIX_FMT_HEVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_HEVC */ +}; diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index 762d0c0f0716f..ecc78d6f89ed2 100644 --- a/drivers/memory/emif.c +++ b/drivers/memory/emif.c @@ -1025,7 +1025,7 @@ static struct emif_data *__init_or_module get_device_details( temp = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL); dev_info = devm_kzalloc(dev, sizeof(*dev_info), GFP_KERNEL); - if (!emif || !pd || !dev_info) { + if (!emif || !temp || !dev_info) { dev_err(dev, "%s:%d: allocation error\n", __func__, __LINE__); goto error; } @@ -1117,7 +1117,7 @@ static int __init_or_module emif_probe(struct platform_device *pdev) { struct emif_data *emif; struct resource *res; - int irq; + int irq, ret; if (pdev->dev.of_node) emif = of_get_memory_device_details(pdev->dev.of_node, &pdev->dev); @@ -1147,7 +1147,9 @@ static int __init_or_module emif_probe(struct platform_device *pdev) emif_onetime_settings(emif); emif_debugfs_init(emif); disable_and_clear_all_interrupts(emif); - setup_interrupts(emif, irq); + ret = setup_interrupts(emif, irq); + if (ret) + goto error; /* One-time actions taken on probing the first device */ if (!emif1) { diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c index 497b6edbf3ca1..25ba3c5e4ad6a 100644 --- a/drivers/memory/tegra/tegra20-emc.c +++ b/drivers/memory/tegra/tegra20-emc.c @@ -540,7 +540,7 @@ static int emc_read_lpddr_mode_register(struct tegra_emc *emc, unsigned int register_addr, unsigned int *register_data) { - u32 memory_dev = emem_dev + 1; + u32 memory_dev = emem_dev ? 1 : 2; u32 val, mr_mask = 0xff; int err; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index c0450397b6735..7ea312f0840e0 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -186,13 +186,8 @@ static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode) mutex_lock(&mspro_block_disk_lock); - if (msb && msb->card) { + if (msb && msb->card) msb->usage_count++; - if ((mode & FMODE_WRITE) && msb->read_only) - rc = -EROFS; - else - rc = 0; - } mutex_unlock(&mspro_block_disk_lock); @@ -1239,6 +1234,9 @@ static int mspro_block_init_disk(struct memstick_dev *card) set_capacity(msb->disk, capacity); dev_dbg(&card->dev, "capacity set %ld\n", capacity); + if (msb->read_only) + set_disk_ro(msb->disk, true); + rc = device_add_disk(&card->dev, msb->disk, NULL); if (rc) goto out_cleanup_disk; diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index 8d58c8df46cfb..56338f9dbd0ba 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -906,14 +906,14 @@ static int __init asic3_mfd_probe(struct platform_device *pdev, ret = mfd_add_devices(&pdev->dev, pdev->id, &asic3_cell_ds1wm, 1, mem, asic->irq_base, NULL); if (ret < 0) - goto out; + goto out_unmap; } if (mem_sdio && (irq >= 0)) { ret = mfd_add_devices(&pdev->dev, pdev->id, &asic3_cell_mmc, 1, mem_sdio, irq, NULL); if (ret < 0) - goto out; + goto out_unmap; } ret = 0; @@ -927,8 +927,12 @@ static int __init asic3_mfd_probe(struct platform_device *pdev, ret = mfd_add_devices(&pdev->dev, 0, asic3_cell_leds, ASIC3_NUM_LEDS, NULL, 0, NULL); } + return ret; - out: +out_unmap: + if (asic->tmio_cnf) + iounmap(asic->tmio_cnf); +out: return ret; } diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index 8a4f1d90dcfd1..1000572761a84 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -323,8 +323,10 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, adc1 |= MC13783_ADC1_ATOX; dev_dbg(mc13xxx->dev, "%s: request irq\n", __func__); - mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE, + ret = mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE, mc13xxx_handler_adcdone, __func__, &adcdone_data); + if (ret) + goto out; mc13xxx_reg_write(mc13xxx, MC13XXX_ADC0, adc0); mc13xxx_reg_write(mc13xxx, MC13XXX_ADC1, adc1); diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c index de6d44a158bba..3f514d77a843f 100644 --- a/drivers/misc/cardreader/alcor_pci.c +++ b/drivers/misc/cardreader/alcor_pci.c @@ -266,7 +266,7 @@ static int alcor_pci_probe(struct pci_dev *pdev, if (!priv) return -ENOMEM; - ret = ida_simple_get(&alcor_pci_idr, 0, 0, GFP_KERNEL); + ret = ida_alloc(&alcor_pci_idr, GFP_KERNEL); if (ret < 0) return ret; priv->id = ret; @@ -280,7 +280,8 @@ static int alcor_pci_probe(struct pci_dev *pdev, ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI); if (ret) { dev_err(&pdev->dev, "Cannot request region\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error_free_ida; } if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { @@ -324,6 +325,8 @@ static int alcor_pci_probe(struct pci_dev *pdev, error_release_regions: pci_release_regions(pdev); +error_free_ida: + ida_free(&alcor_pci_idr, priv->id); return ret; } @@ -337,7 +340,7 @@ static void alcor_pci_remove(struct pci_dev *pdev) mfd_remove_devices(&pdev->dev); - ida_simple_remove(&alcor_pci_idr, priv->id); + ida_free(&alcor_pci_idr, priv->id); pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index fc084ee5106ec..09001fd9db85f 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -890,6 +890,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf, pci_set_power_state(hdev->pdev, PCI_D0); pci_restore_state(hdev->pdev); rc = pci_enable_device(hdev->pdev); + if (rc < 0) + return rc; } else if (value == 2) { pci_save_state(hdev->pdev); pci_disable_device(hdev->pdev); diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 67c5b452dd356..88b91ad8e5413 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -1070,10 +1070,10 @@ static int kgdbts_option_setup(char *opt) { if (strlen(opt) >= MAX_CONFIG_LEN) { printk(KERN_ERR "kgdbts: config string too long\n"); - return -ENOSPC; + return 1; } strcpy(config, opt); - return 0; + return 1; } __setup("kgdbts=", kgdbts_option_setup); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 67bb6a25fd0a0..64ce3f830262b 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -107,6 +107,7 @@ #define MEI_DEV_ID_ADP_S 0x7AE8 /* Alder Lake Point S */ #define MEI_DEV_ID_ADP_LP 0x7A60 /* Alder Lake Point LP */ #define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */ +#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */ /* * MEI HW Section @@ -120,6 +121,7 @@ #define PCI_CFG_HFS_2 0x48 #define PCI_CFG_HFS_3 0x60 # define PCI_CFG_HFS_3_FW_SKU_MSK 0x00000070 +# define PCI_CFG_HFS_3_FW_SKU_IGN 0x00000000 # define PCI_CFG_HFS_3_FW_SKU_SPS 0x00000060 #define PCI_CFG_HFS_4 0x64 #define PCI_CFG_HFS_5 0x68 diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index d3a6c07286451..fbc4c95818645 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1405,16 +1405,16 @@ static bool mei_me_fw_type_sps_4(const struct pci_dev *pdev) .quirk_probe = mei_me_fw_type_sps_4 /** - * mei_me_fw_type_sps() - check for sps sku + * mei_me_fw_type_sps_ign() - check for sps or ign sku * - * Read ME FW Status register to check for SPS Firmware. - * The SPS FW is only signaled in pci function 0 + * Read ME FW Status register to check for SPS or IGN Firmware. + * The SPS/IGN FW is only signaled in pci function 0 * * @pdev: pci device * - * Return: true in case of SPS firmware + * Return: true in case of SPS/IGN firmware */ -static bool mei_me_fw_type_sps(const struct pci_dev *pdev) +static bool mei_me_fw_type_sps_ign(const struct pci_dev *pdev) { u32 reg; u32 fw_type; @@ -1427,14 +1427,15 @@ static bool mei_me_fw_type_sps(const struct pci_dev *pdev) dev_dbg(&pdev->dev, "fw type is %d\n", fw_type); - return fw_type == PCI_CFG_HFS_3_FW_SKU_SPS; + return fw_type == PCI_CFG_HFS_3_FW_SKU_IGN || + fw_type == PCI_CFG_HFS_3_FW_SKU_SPS; } #define MEI_CFG_KIND_ITOUCH \ .kind = "itouch" -#define MEI_CFG_FW_SPS \ - .quirk_probe = mei_me_fw_type_sps +#define MEI_CFG_FW_SPS_IGN \ + .quirk_probe = mei_me_fw_type_sps_ign #define MEI_CFG_FW_VER_SUPP \ .fw_ver_supported = 1 @@ -1535,7 +1536,7 @@ static const struct mei_cfg mei_me_pch12_sps_cfg = { MEI_CFG_PCH8_HFS, MEI_CFG_FW_VER_SUPP, MEI_CFG_DMA_128, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_IGN, }; /* Cannon Lake itouch with quirk for SPS 5.0 and newer Firmware exclusion @@ -1545,7 +1546,7 @@ static const struct mei_cfg mei_me_pch12_itouch_sps_cfg = { MEI_CFG_KIND_ITOUCH, MEI_CFG_PCH8_HFS, MEI_CFG_FW_VER_SUPP, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_IGN, }; /* Tiger Lake and newer devices */ @@ -1562,7 +1563,7 @@ static const struct mei_cfg mei_me_pch15_sps_cfg = { MEI_CFG_FW_VER_SUPP, MEI_CFG_DMA_128, MEI_CFG_TRC, - MEI_CFG_FW_SPS, + MEI_CFG_FW_SPS_IGN, }; /* diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c index a67f4f2d33a93..0706322154cbe 100644 --- a/drivers/misc/mei/interrupt.c +++ b/drivers/misc/mei/interrupt.c @@ -424,31 +424,26 @@ int mei_irq_read_handler(struct mei_device *dev, list_for_each_entry(cl, &dev->file_list, link) { if (mei_cl_hbm_equal(cl, mei_hdr)) { cl_dbg(dev, cl, "got a message\n"); - break; + ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list); + goto reset_slots; } } /* if no recipient cl was found we assume corrupted header */ - if (&cl->link == &dev->file_list) { - /* A message for not connected fixed address clients - * should be silently discarded - * On power down client may be force cleaned, - * silently discard such messages - */ - if (hdr_is_fixed(mei_hdr) || - dev->dev_state == MEI_DEV_POWER_DOWN) { - mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length); - ret = 0; - goto reset_slots; - } - dev_err(dev->dev, "no destination client found 0x%08X\n", - dev->rd_msg_hdr[0]); - ret = -EBADMSG; - goto end; + /* A message for not connected fixed address clients + * should be silently discarded + * On power down client may be force cleaned, + * silently discard such messages + */ + if (hdr_is_fixed(mei_hdr) || + dev->dev_state == MEI_DEV_POWER_DOWN) { + mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length); + ret = 0; + goto reset_slots; } - - ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list); - + dev_err(dev->dev, "no destination client found 0x%08X\n", dev->rd_msg_hdr[0]); + ret = -EBADMSG; + goto end; reset_slots: /* reset the number of slots and header */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 3a45aaf002ac8..a738253dbd056 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -113,6 +113,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 096ae624be9aa..58a60afa650b6 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -34,13 +35,13 @@ static ssize_t type_show(struct device *dev, switch (card->type) { case MMC_TYPE_MMC: - return sprintf(buf, "MMC\n"); + return sysfs_emit(buf, "MMC\n"); case MMC_TYPE_SD: - return sprintf(buf, "SD\n"); + return sysfs_emit(buf, "SD\n"); case MMC_TYPE_SDIO: - return sprintf(buf, "SDIO\n"); + return sysfs_emit(buf, "SDIO\n"); case MMC_TYPE_SD_COMBO: - return sprintf(buf, "SDcombo\n"); + return sysfs_emit(buf, "SDcombo\n"); default: return -EFAULT; } diff --git a/drivers/mmc/core/bus.h b/drivers/mmc/core/bus.h index 8105852c4b62f..3996b191b68d1 100644 --- a/drivers/mmc/core/bus.h +++ b/drivers/mmc/core/bus.h @@ -9,6 +9,7 @@ #define _MMC_CORE_BUS_H #include +#include struct mmc_host; struct mmc_card; @@ -17,7 +18,7 @@ struct mmc_card; static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ { \ struct mmc_card *card = mmc_dev_to_card(dev); \ - return sprintf(buf, fmt, args); \ + return sysfs_emit(buf, fmt, args); \ } \ static DEVICE_ATTR(name, S_IRUGO, mmc_##name##_show, NULL) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index cf140f4ec8643..d739e2b631fe8 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -588,6 +588,16 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) EXPORT_SYMBOL(mmc_alloc_host); +static int mmc_validate_host_caps(struct mmc_host *host) +{ + if (host->caps & MMC_CAP_SDIO_IRQ && !host->ops->enable_sdio_irq) { + dev_warn(host->parent, "missing ->enable_sdio_irq() ops\n"); + return -EINVAL; + } + + return 0; +} + /** * mmc_add_host - initialise host hardware * @host: mmc host @@ -600,8 +610,9 @@ int mmc_add_host(struct mmc_host *host) { int err; - WARN_ON((host->caps & MMC_CAP_SDIO_IRQ) && - !host->ops->enable_sdio_irq); + err = mmc_validate_host_caps(host); + if (err) + return err; err = device_add(&host->class_dev); if (err) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 8421519c2a983..43d1b9b2fa499 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -812,12 +813,11 @@ static ssize_t mmc_fwrev_show(struct device *dev, { struct mmc_card *card = mmc_dev_to_card(dev); - if (card->ext_csd.rev < 7) { - return sprintf(buf, "0x%x\n", card->cid.fwrev); - } else { - return sprintf(buf, "0x%*phN\n", MMC_FIRMWARE_LEN, - card->ext_csd.fwrev); - } + if (card->ext_csd.rev < 7) + return sysfs_emit(buf, "0x%x\n", card->cid.fwrev); + else + return sysfs_emit(buf, "0x%*phN\n", MMC_FIRMWARE_LEN, + card->ext_csd.fwrev); } static DEVICE_ATTR(fwrev, S_IRUGO, mmc_fwrev_show, NULL); @@ -830,10 +830,10 @@ static ssize_t mmc_dsr_show(struct device *dev, struct mmc_host *host = card->host; if (card->csd.dsr_imp && host->dsr_req) - return sprintf(buf, "0x%x\n", host->dsr); + return sysfs_emit(buf, "0x%x\n", host->dsr); else /* return default DSR value */ - return sprintf(buf, "0x%x\n", 0x404); + return sysfs_emit(buf, "0x%x\n", 0x404); } static DEVICE_ATTR(dsr, S_IRUGO, mmc_dsr_show, NULL); diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index bfbfed30dc4d8..68df6b2f49cc7 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -708,18 +709,16 @@ MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr); MMC_DEV_ATTR(rca, "0x%04x\n", card->rca); -static ssize_t mmc_dsr_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t mmc_dsr_show(struct device *dev, struct device_attribute *attr, + char *buf) { - struct mmc_card *card = mmc_dev_to_card(dev); - struct mmc_host *host = card->host; - - if (card->csd.dsr_imp && host->dsr_req) - return sprintf(buf, "0x%x\n", host->dsr); - else - /* return default DSR value */ - return sprintf(buf, "0x%x\n", 0x404); + struct mmc_card *card = mmc_dev_to_card(dev); + struct mmc_host *host = card->host; + + if (card->csd.dsr_imp && host->dsr_req) + return sysfs_emit(buf, "0x%x\n", host->dsr); + /* return default DSR value */ + return sysfs_emit(buf, "0x%x\n", 0x404); } static DEVICE_ATTR(dsr, S_IRUGO, mmc_dsr_show, NULL); @@ -735,9 +734,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > card->num_info) \ return -ENODATA; \ - if (!card->info[num-1][0]) \ + if (!card->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", card->info[num-1]); \ + return sysfs_emit(buf, "%s\n", card->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 41164748723d2..25799accf8a02 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -40,9 +41,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > card->num_info) \ return -ENODATA; \ - if (!card->info[num-1][0]) \ + if (!card->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", card->info[num-1]); \ + return sysfs_emit(buf, "%s\n", card->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index fda03b35c14a5..c6268c38c69e5 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -35,7 +36,7 @@ field##_show(struct device *dev, struct device_attribute *attr, char *buf) \ struct sdio_func *func; \ \ func = dev_to_sdio_func (dev); \ - return sprintf(buf, format_string, args); \ + return sysfs_emit(buf, format_string, args); \ } \ static DEVICE_ATTR_RO(field) @@ -52,9 +53,9 @@ static ssize_t info##num##_show(struct device *dev, struct device_attribute *att \ if (num > func->num_info) \ return -ENODATA; \ - if (!func->info[num-1][0]) \ + if (!func->info[num - 1][0]) \ return 0; \ - return sprintf(buf, "%s\n", func->info[num-1]); \ + return sysfs_emit(buf, "%s\n", func->info[num - 1]); \ } \ static DEVICE_ATTR_RO(info##num) diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index 2a757c88f9d21..80de660027d89 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1375,8 +1375,12 @@ static int davinci_mmcsd_suspend(struct device *dev) static int davinci_mmcsd_resume(struct device *dev) { struct mmc_davinci_host *host = dev_get_drvdata(dev); + int ret; + + ret = clk_enable(host->clk); + if (ret) + return ret; - clk_enable(host->clk); mmc_davinci_reset_ctrl(host, 0); return 0; diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index 58cfaffa3c2d8..f7c384db89bf3 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -1495,12 +1495,12 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) realtek_init_host(host); - if (pcr->rtd3_en) { - pm_runtime_set_autosuspend_delay(&pdev->dev, 5000); - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_enable(&pdev->dev); - } - + pm_runtime_no_callbacks(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, 200); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_use_autosuspend(&pdev->dev); mmc_add_host(mmc); @@ -1521,11 +1521,6 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) pcr->slots[RTSX_SD_CARD].card_event = NULL; mmc = host->mmc; - if (pcr->rtd3_en) { - pm_runtime_dont_use_autosuspend(&pdev->dev); - pm_runtime_disable(&pdev->dev); - } - cancel_work_sync(&host->work); mutex_lock(&host->host_mutex); @@ -1548,6 +1543,9 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) flush_work(&host->work); + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_disable(&pdev->dev); + mmc_free_host(mmc); dev_dbg(&(pdev->dev), diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index f654afbe8e83c..b4891bb266485 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -514,26 +514,6 @@ static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = { .flags = IOMUX_PRESENT, }; -static const struct sdhci_pltfm_data sdhci_am64_8bit_pdata = { - .ops = &sdhci_j721e_8bit_ops, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, -}; - -static const struct sdhci_am654_driver_data sdhci_am64_8bit_drvdata = { - .pdata = &sdhci_am64_8bit_pdata, - .flags = DLL_PRESENT | DLL_CALIB, -}; - -static const struct sdhci_pltfm_data sdhci_am64_4bit_pdata = { - .ops = &sdhci_j721e_4bit_ops, - .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, -}; - -static const struct sdhci_am654_driver_data sdhci_am64_4bit_drvdata = { - .pdata = &sdhci_am64_4bit_pdata, - .flags = IOMUX_PRESENT, -}; - static const struct soc_device_attribute sdhci_am654_devices[] = { { .family = "AM65X", .revision = "SR1.0", @@ -759,11 +739,11 @@ static const struct of_device_id sdhci_am654_of_match[] = { }, { .compatible = "ti,am64-sdhci-8bit", - .data = &sdhci_am64_8bit_drvdata, + .data = &sdhci_j721e_8bit_drvdata, }, { .compatible = "ti,am64-sdhci-4bit", - .data = &sdhci_am64_4bit_drvdata, + .data = &sdhci_j721e_4bit_drvdata, }, { /* sentinel */ } }; diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c index a8b31bddf14b8..1a840db207b5a 100644 --- a/drivers/mtd/devices/mchp23k256.c +++ b/drivers/mtd/devices/mchp23k256.c @@ -231,6 +231,19 @@ static const struct of_device_id mchp23k256_of_table[] = { }; MODULE_DEVICE_TABLE(of, mchp23k256_of_table); +static const struct spi_device_id mchp23k256_spi_ids[] = { + { + .name = "mchp23k256", + .driver_data = (kernel_ulong_t)&mchp23k256_caps, + }, + { + .name = "mchp23lcv1024", + .driver_data = (kernel_ulong_t)&mchp23lcv1024_caps, + }, + {} +}; +MODULE_DEVICE_TABLE(spi, mchp23k256_spi_ids); + static struct spi_driver mchp23k256_driver = { .driver = { .name = "mchp23k256", @@ -238,6 +251,7 @@ static struct spi_driver mchp23k256_driver = { }, .probe = mchp23k256_probe, .remove = mchp23k256_remove, + .id_table = mchp23k256_spi_ids, }; module_spi_driver(mchp23k256_driver); diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c index 231a107901960..b9cf2b4415a54 100644 --- a/drivers/mtd/devices/mchp48l640.c +++ b/drivers/mtd/devices/mchp48l640.c @@ -359,6 +359,15 @@ static const struct of_device_id mchp48l640_of_table[] = { }; MODULE_DEVICE_TABLE(of, mchp48l640_of_table); +static const struct spi_device_id mchp48l640_spi_ids[] = { + { + .name = "48l640", + .driver_data = (kernel_ulong_t)&mchp48l640_caps, + }, + {} +}; +MODULE_DEVICE_TABLE(spi, mchp48l640_spi_ids); + static struct spi_driver mchp48l640_driver = { .driver = { .name = "mchp48l640", @@ -366,6 +375,7 @@ static struct spi_driver mchp48l640_driver = { }, .probe = mchp48l640_probe, .remove = mchp48l640_remove, + .id_table = mchp48l640_spi_ids, }; module_spi_driver(mchp48l640_driver); diff --git a/drivers/mtd/nand/onenand/generic.c b/drivers/mtd/nand/onenand/generic.c index 8b6f4da5d7201..a4b8b65fe15f5 100644 --- a/drivers/mtd/nand/onenand/generic.c +++ b/drivers/mtd/nand/onenand/generic.c @@ -53,7 +53,12 @@ static int generic_onenand_probe(struct platform_device *pdev) } info->onenand.mmcontrol = pdata ? pdata->mmcontrol : NULL; - info->onenand.irq = platform_get_irq(pdev, 0); + + err = platform_get_irq(pdev, 0); + if (err < 0) + goto out_iounmap; + + info->onenand.irq = err; info->mtd.dev.parent = &pdev->dev; info->mtd.priv = &info->onenand; diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index f3276ee9e4fe7..ddd93bc38ea6c 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -2060,13 +2060,15 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc, nc->mck = of_clk_get(dev->parent->of_node, 0); if (IS_ERR(nc->mck)) { dev_err(dev, "Failed to retrieve MCK clk\n"); - return PTR_ERR(nc->mck); + ret = PTR_ERR(nc->mck); + goto out_release_dma; } np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0); if (!np) { dev_err(dev, "Missing or invalid atmel,smc property\n"); - return -EINVAL; + ret = -EINVAL; + goto out_release_dma; } nc->smc = syscon_node_to_regmap(np); @@ -2074,10 +2076,16 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc, if (IS_ERR(nc->smc)) { ret = PTR_ERR(nc->smc); dev_err(dev, "Could not get SMC regmap (err = %d)\n", ret); - return ret; + goto out_release_dma; } return 0; + +out_release_dma: + if (nc->dmac) + dma_release_channel(nc->dmac); + + return ret; } static int diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index ded4df4739280..e50db25e5ddcb 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -648,6 +648,7 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, const struct nand_sdr_timings *sdr) { struct gpmi_nfc_hardware_timing *hw = &this->hw; + struct resources *r = &this->resources; unsigned int dll_threshold_ps = this->devdata->max_chain_delay; unsigned int period_ps, reference_period_ps; unsigned int data_setup_cycles, data_hold_cycles, addr_setup_cycles; @@ -671,6 +672,8 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY; } + hw->clk_rate = clk_round_rate(r->clock[0], hw->clk_rate); + /* SDR core timings are given in picoseconds */ period_ps = div_u64((u64)NSEC_PER_SEC * 1000, hw->clk_rate); diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index e7b2ba016d8c6..8daaba96edb2c 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -338,16 +338,19 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs) * * Return: -EBUSY if the chip has been suspended, 0 otherwise */ -static int nand_get_device(struct nand_chip *chip) +static void nand_get_device(struct nand_chip *chip) { - mutex_lock(&chip->lock); - if (chip->suspended) { + /* Wait until the device is resumed. */ + while (1) { + mutex_lock(&chip->lock); + if (!chip->suspended) { + mutex_lock(&chip->controller->lock); + return; + } mutex_unlock(&chip->lock); - return -EBUSY; - } - mutex_lock(&chip->controller->lock); - return 0; + wait_event(chip->resume_wq, !chip->suspended); + } } /** @@ -576,9 +579,7 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs) nand_erase_nand(chip, &einfo, 0); /* Write bad block marker to OOB */ - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); ret = nand_markbad_bbm(chip, ofs); nand_release_device(chip); @@ -3826,9 +3827,7 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from, ops->mode != MTD_OPS_RAW) return -ENOTSUPP; - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); if (!ops->datbuf) ret = nand_do_read_oob(chip, from, ops); @@ -4415,13 +4414,11 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { struct nand_chip *chip = mtd_to_nand(mtd); - int ret; + int ret = 0; ops->retlen = 0; - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); switch (ops->mode) { case MTD_OPS_PLACE_OOB: @@ -4481,9 +4478,7 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr, return -EIO; /* Grab the lock and see if the device is available */ - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); /* Shift to get first page */ page = (int)(instr->addr >> chip->page_shift); @@ -4570,7 +4565,7 @@ static void nand_sync(struct mtd_info *mtd) pr_debug("%s: called\n", __func__); /* Grab the lock and see if the device is available */ - WARN_ON(nand_get_device(chip)); + nand_get_device(chip); /* Release it and go back */ nand_release_device(chip); } @@ -4587,9 +4582,7 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs) int ret; /* Select the NAND device */ - ret = nand_get_device(chip); - if (ret) - return ret; + nand_get_device(chip); nand_select_target(chip, chipnr); @@ -4660,6 +4653,8 @@ static void nand_resume(struct mtd_info *mtd) __func__); } mutex_unlock(&chip->lock); + + wake_up_all(&chip->resume_wq); } /** @@ -5437,6 +5432,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, chip->cur_cs = -1; mutex_init(&chip->lock); + init_waitqueue_head(&chip->resume_wq); /* Enforce the right timings for reset/detection */ chip->current_interface_config = nand_get_reset_interface_config(); diff --git a/drivers/mtd/nand/raw/pl35x-nand-controller.c b/drivers/mtd/nand/raw/pl35x-nand-controller.c index 8a91e069ee2e9..3c6f6aff649f8 100644 --- a/drivers/mtd/nand/raw/pl35x-nand-controller.c +++ b/drivers/mtd/nand/raw/pl35x-nand-controller.c @@ -1062,7 +1062,7 @@ static int pl35x_nand_chip_init(struct pl35x_nandc *nfc, chip->controller = &nfc->controller; mtd = nand_to_mtd(chip); mtd->dev.parent = nfc->dev; - nand_set_flash_node(chip, nfc->dev->of_node); + nand_set_flash_node(chip, np); if (!mtd->name) { mtd->name = devm_kasprintf(nfc->dev, GFP_KERNEL, "%s", PL35X_NANDC_DRIVER_NAME); diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 04ea180118e33..cc155f6c6c68c 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -3181,10 +3181,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor) mtd->flags = MTD_CAP_NORFLASH; if (nor->info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; + else + mtd->_erase = spi_nor_erase; mtd->writesize = nor->params->writesize; mtd->writebufsize = nor->params->page_size; mtd->size = nor->params->size; - mtd->_erase = spi_nor_erase; mtd->_read = spi_nor_read; /* Might be already set by some SST flashes. */ if (!mtd->_write) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index a7e3eb9befb62..a32050fecabf3 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -351,9 +351,6 @@ static ssize_t dev_attribute_show(struct device *dev, * we still can use 'ubi->ubi_num'. */ ubi = container_of(dev, struct ubi_device, dev); - ubi = ubi_get_device(ubi->ubi_num); - if (!ubi) - return -ENODEV; if (attr == &dev_eraseblock_size) ret = sprintf(buf, "%d\n", ubi->leb_size); @@ -382,7 +379,6 @@ static ssize_t dev_attribute_show(struct device *dev, else ret = -EINVAL; - ubi_put_device(ubi); return ret; } @@ -979,9 +975,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, goto out_detach; } - /* Make device "available" before it becomes accessible via sysfs */ - ubi_devices[ubi_num] = ubi; - err = uif_init(ubi); if (err) goto out_detach; @@ -1026,6 +1019,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); + ubi_devices[ubi_num] = ubi; ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; @@ -1034,7 +1028,6 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, out_uif: uif_close(ubi); out_detach: - ubi_devices[ubi_num] = NULL; ubi_wl_close(ubi); ubi_free_all_volumes(ubi); vfree(ubi->vtbl); diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 022af59906aa9..6b5f1ffd961b9 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -468,7 +468,9 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai, if (err == UBI_IO_FF_BITFLIPS) scrub = 1; - add_aeb(ai, free, pnum, ec, scrub); + ret = add_aeb(ai, free, pnum, ec, scrub); + if (ret) + goto out; continue; } else if (err == 0 || err == UBI_IO_BITFLIPS) { dbg_bld("Found non empty PEB:%i in pool", pnum); @@ -638,8 +640,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 0); + ret = add_aeb(ai, &ai->free, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + if (ret) + goto fail; } /* read EC values from used list */ @@ -649,8 +653,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &used, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 0); + ret = add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 0); + if (ret) + goto fail; } /* read EC values from scrub list */ @@ -660,8 +666,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &used, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 1); + ret = add_aeb(ai, &used, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + if (ret) + goto fail; } /* read EC values from erase list */ @@ -671,8 +679,10 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, if (fm_pos >= fm_size) goto fail_bad; - add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum), - be32_to_cpu(fmec->ec), 1); + ret = add_aeb(ai, &ai->erase, be32_to_cpu(fmec->pnum), + be32_to_cpu(fmec->ec), 1); + if (ret) + goto fail; } ai->mean_ec = div_u64(ai->ec_sum, ai->ec_count); diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 139ee132bfbcf..1bc7b3a056046 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -56,16 +56,11 @@ static ssize_t vol_attribute_show(struct device *dev, { int ret; struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); - struct ubi_device *ubi; - - ubi = ubi_get_device(vol->ubi->ubi_num); - if (!ubi) - return -ENODEV; + struct ubi_device *ubi = vol->ubi; spin_lock(&ubi->volumes_lock); if (!ubi->volumes[vol->vol_id]) { spin_unlock(&ubi->volumes_lock); - ubi_put_device(ubi); return -ENODEV; } /* Take a reference to prevent volume removal */ @@ -103,7 +98,6 @@ static ssize_t vol_attribute_show(struct device *dev, vol->ref_count -= 1; ubi_assert(vol->ref_count >= 0); spin_unlock(&ubi->volumes_lock); - ubi_put_device(ubi); return ret; } diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index ba587e5fc24fc..683203f87ae2b 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -148,14 +148,14 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_mac_header(skb); - if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET) + if (!ipv6_mod_enabled() || family == AF_INET) err = IP_ECN_decapsulate(oiph, skb); else err = IP6_ECN_decapsulate(oiph, skb); if (unlikely(err)) { if (log_ecn_error) { - if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET) + if (!ipv6_mod_enabled() || family == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", &((struct iphdr *)oiph)->saddr, @@ -221,11 +221,12 @@ static struct socket *bareudp_create_sock(struct net *net, __be16 port) int err; memset(&udp_conf, 0, sizeof(udp_conf)); -#if IS_ENABLED(CONFIG_IPV6) - udp_conf.family = AF_INET6; -#else - udp_conf.family = AF_INET; -#endif + + if (ipv6_mod_enabled()) + udp_conf.family = AF_INET6; + else + udp_conf.family = AF_INET; + udp_conf.local_udp_port = port; /* Open UDP socket */ err = udp_sock_create(net, &udp_conf, &sock); @@ -448,7 +449,7 @@ static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) } rcu_read_lock(); - if (IS_ENABLED(CONFIG_IPV6) && info->mode & IP_TUNNEL_INFO_IPV6) + if (ipv6_mod_enabled() && info->mode & IP_TUNNEL_INFO_IPV6) err = bareudp6_xmit_skb(skb, dev, bareudp, info); else err = bareudp_xmit_skb(skb, dev, bareudp, info); @@ -478,7 +479,7 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, use_cache = ip_tunnel_dst_cache_usable(skb, info); - if (!IS_ENABLED(CONFIG_IPV6) || ip_tunnel_info_af(info) == AF_INET) { + if (!ipv6_mod_enabled() || ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; __be32 saddr; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 1a4b56f6fa8c6..b3b5bc1c803b3 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1637,8 +1637,6 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) if (err) goto out_fail; - can_put_echo_skb(skb, dev, 0, 0); - if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) { cccr = m_can_read(cdev, M_CAN_CCCR); cccr &= ~CCCR_CMR_MASK; @@ -1655,6 +1653,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) m_can_write(cdev, M_CAN_CCCR, cccr); } m_can_write(cdev, M_CAN_TXBTIE, 0x1); + + can_put_echo_skb(skb, dev, 0, 0); + m_can_write(cdev, M_CAN_TXBAR, 0x1); /* End of xmit function for version 3.0.x */ } else { diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index b5986df6eca0b..1c192554209a3 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1657,7 +1657,7 @@ mcp251xfd_register_get_dev_id(const struct mcp251xfd_priv *priv, out_kfree_buf_rx: kfree(buf_rx); - return 0; + return err; } #define MCP251XFD_QUIRK_ACTIVE(quirk) \ diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 7bedceffdfa36..bbec3311d8934 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -819,7 +819,6 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne usb_unanchor_urb(urb); usb_free_coherent(dev->udev, size, buf, urb->transfer_dma); - dev_kfree_skb(skb); atomic_dec(&dev->active_tx_urbs); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 77bddff86252b..c45a814e1de2f 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -33,10 +33,6 @@ #define MCBA_USB_RX_BUFF_SIZE 64 #define MCBA_USB_TX_BUFF_SIZE (sizeof(struct mcba_usb_msg)) -/* MCBA endpoint numbers */ -#define MCBA_USB_EP_IN 1 -#define MCBA_USB_EP_OUT 1 - /* Microchip command id */ #define MBCA_CMD_RECEIVE_MESSAGE 0xE3 #define MBCA_CMD_I_AM_ALIVE_FROM_CAN 0xF5 @@ -83,6 +79,8 @@ struct mcba_priv { atomic_t free_ctx_cnt; void *rxbuf[MCBA_MAX_RX_URBS]; dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS]; + int rx_pipe; + int tx_pipe; }; /* CAN frame */ @@ -268,10 +266,8 @@ static netdev_tx_t mcba_usb_xmit(struct mcba_priv *priv, memcpy(buf, usb_msg, MCBA_USB_TX_BUFF_SIZE); - usb_fill_bulk_urb(urb, priv->udev, - usb_sndbulkpipe(priv->udev, MCBA_USB_EP_OUT), buf, - MCBA_USB_TX_BUFF_SIZE, mcba_usb_write_bulk_callback, - ctx); + usb_fill_bulk_urb(urb, priv->udev, priv->tx_pipe, buf, MCBA_USB_TX_BUFF_SIZE, + mcba_usb_write_bulk_callback, ctx); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &priv->tx_submitted); @@ -364,7 +360,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, xmit_failed: can_free_echo_skb(priv->netdev, ctx->ndx, NULL); mcba_usb_free_ctx(ctx); - dev_kfree_skb(skb); stats->tx_dropped++; return NETDEV_TX_OK; @@ -608,7 +603,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb) resubmit_urb: usb_fill_bulk_urb(urb, priv->udev, - usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_OUT), + priv->rx_pipe, urb->transfer_buffer, MCBA_USB_RX_BUFF_SIZE, mcba_usb_read_bulk_callback, priv); @@ -653,7 +648,7 @@ static int mcba_usb_start(struct mcba_priv *priv) urb->transfer_dma = buf_dma; usb_fill_bulk_urb(urb, priv->udev, - usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN), + priv->rx_pipe, buf, MCBA_USB_RX_BUFF_SIZE, mcba_usb_read_bulk_callback, priv); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; @@ -807,6 +802,13 @@ static int mcba_usb_probe(struct usb_interface *intf, struct mcba_priv *priv; int err; struct usb_device *usbdev = interface_to_usbdev(intf); + struct usb_endpoint_descriptor *in, *out; + + err = usb_find_common_endpoints(intf->cur_altsetting, &in, &out, NULL, NULL); + if (err) { + dev_err(&intf->dev, "Can't find endpoints\n"); + return err; + } netdev = alloc_candev(sizeof(struct mcba_priv), MCBA_MAX_TX_URBS); if (!netdev) { @@ -852,6 +854,9 @@ static int mcba_usb_probe(struct usb_interface *intf, goto cleanup_free_candev; } + priv->rx_pipe = usb_rcvbulkpipe(priv->udev, in->bEndpointAddress); + priv->tx_pipe = usb_sndbulkpipe(priv->udev, out->bEndpointAddress); + devm_can_led_init(netdev); /* Start USB dev only if we have successfully registered CAN device */ diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 431af1ec1e3ca..b638604bf1eef 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -663,9 +663,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, atomic_inc(&priv->active_tx_urbs); err = usb_submit_urb(urb, GFP_ATOMIC); - if (unlikely(err)) - goto failed; - else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) + if (unlikely(err)) { + can_free_echo_skb(netdev, context->echo_index, NULL); + + usb_unanchor_urb(urb); + usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); + + atomic_dec(&priv->active_tx_urbs); + + if (err == -ENODEV) + netif_device_detach(netdev); + else + netdev_warn(netdev, "failed tx_urb %d\n", err); + stats->tx_dropped++; + } else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) /* Slow down tx path */ netif_stop_queue(netdev); @@ -684,19 +695,6 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; -failed: - can_free_echo_skb(netdev, context->echo_index, NULL); - - usb_unanchor_urb(urb); - usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); - - atomic_dec(&priv->active_tx_urbs); - - if (err == -ENODEV) - netif_device_detach(netdev); - else - netdev_warn(netdev, "failed tx_urb %d\n", err); - nomembuf: usb_free_urb(urb); diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 47ccc15a3486b..191ffa7776e8d 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -148,7 +148,7 @@ static void vxcan_setup(struct net_device *dev) dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 0; - dev->flags = (IFF_NOARP|IFF_ECHO); + dev->flags = IFF_NOARP; dev->netdev_ops = &vxcan_netdev_ops; dev->needs_free_netdev = true; diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 0029d279616fd..37a3dabdce313 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -68,17 +68,7 @@ config NET_DSA_QCA8K This enables support for the Qualcomm Atheros QCA8K Ethernet switch chips. -config NET_DSA_REALTEK_SMI - tristate "Realtek SMI Ethernet switch family support" - select NET_DSA_TAG_RTL4_A - select NET_DSA_TAG_RTL8_4 - select FIXED_PHY - select IRQ_DOMAIN - select REALTEK_PHY - select REGMAP - help - This enables support for the Realtek SMI-based switch - chips, currently only RTL8366RB. +source "drivers/net/dsa/realtek/Kconfig" config NET_DSA_SMSC_LAN9303 tristate diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index 8da1569a34e6e..e73838c122560 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -9,8 +9,6 @@ obj-$(CONFIG_NET_DSA_LANTIQ_GSWIP) += lantiq_gswip.o obj-$(CONFIG_NET_DSA_MT7530) += mt7530.o obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o -obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o -realtek-smi-objs := realtek-smi-core.o rtl8366.o rtl8366rb.o rtl8365mb.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o @@ -23,5 +21,6 @@ obj-y += microchip/ obj-y += mv88e6xxx/ obj-y += ocelot/ obj-y += qca/ +obj-y += realtek/ obj-y += sja1105/ obj-y += xrs700x/ diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index a7e2fcf2df2c9..edbe5e7f1cb6b 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -567,14 +567,14 @@ static void bcm_sf2_cfp_slice_ipv6(struct bcm_sf2_priv *priv, static struct cfp_rule *bcm_sf2_cfp_rule_find(struct bcm_sf2_priv *priv, int port, u32 location) { - struct cfp_rule *rule = NULL; + struct cfp_rule *rule; list_for_each_entry(rule, &priv->cfp.rules_list, next) { if (rule->port == port && rule->fs.location == location) - break; + return rule; } - return rule; + return NULL; } static int bcm_sf2_cfp_rule_cmp(struct bcm_sf2_priv *priv, int port, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ab1676553714c..cf7754dddad78 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3639,6 +3639,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .port_sync_link = mv88e6185_port_sync_link, .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, .port_tag_remap = mv88e6095_port_tag_remap, + .port_set_policy = mv88e6352_port_set_policy, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_ucast_flood = mv88e6352_port_set_ucast_flood, .port_set_mcast_flood = mv88e6352_port_set_mcast_flood, diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig new file mode 100644 index 0000000000000..1c62212fb0ecb --- /dev/null +++ b/drivers/net/dsa/realtek/Kconfig @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-only +menuconfig NET_DSA_REALTEK + tristate "Realtek Ethernet switch family support" + depends on NET_DSA + select NET_DSA_TAG_RTL4_A + select NET_DSA_TAG_RTL8_4 + select FIXED_PHY + select IRQ_DOMAIN + select REALTEK_PHY + select REGMAP + help + Select to enable support for Realtek Ethernet switch chips. + +config NET_DSA_REALTEK_SMI + tristate "Realtek SMI connected switch driver" + depends on NET_DSA_REALTEK + default y + help + Select to enable support for registering switches connected + through SMI. diff --git a/drivers/net/dsa/realtek/Makefile b/drivers/net/dsa/realtek/Makefile new file mode 100644 index 0000000000000..323b921bfce0f --- /dev/null +++ b/drivers/net/dsa/realtek/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek-smi.o +realtek-smi-objs := realtek-smi-core.o rtl8366.o rtl8366rb.o rtl8365mb.o diff --git a/drivers/net/dsa/realtek-smi-core.c b/drivers/net/dsa/realtek/realtek-smi-core.c similarity index 100% rename from drivers/net/dsa/realtek-smi-core.c rename to drivers/net/dsa/realtek/realtek-smi-core.c diff --git a/drivers/net/dsa/realtek-smi-core.h b/drivers/net/dsa/realtek/realtek-smi-core.h similarity index 99% rename from drivers/net/dsa/realtek-smi-core.h rename to drivers/net/dsa/realtek/realtek-smi-core.h index 5bfa53e2480ae..faed387d8db38 100644 --- a/drivers/net/dsa/realtek-smi-core.h +++ b/drivers/net/dsa/realtek/realtek-smi-core.h @@ -25,7 +25,7 @@ struct rtl8366_mib_counter { const char *name; }; -/** +/* * struct rtl8366_vlan_mc - Virtual LAN member configuration */ struct rtl8366_vlan_mc { @@ -74,7 +74,7 @@ struct realtek_smi { void *chip_data; /* Per-chip extra variant data */ }; -/** +/* * struct realtek_smi_ops - vtable for the per-SMI-chiptype operations * @detect: detects the chiptype */ diff --git a/drivers/net/dsa/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c similarity index 100% rename from drivers/net/dsa/rtl8365mb.c rename to drivers/net/dsa/realtek/rtl8365mb.c diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/realtek/rtl8366.c similarity index 100% rename from drivers/net/dsa/rtl8366.c rename to drivers/net/dsa/realtek/rtl8366.c diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c similarity index 99% rename from drivers/net/dsa/rtl8366rb.c rename to drivers/net/dsa/realtek/rtl8366rb.c index ecc19bd5115f0..4f8c06d7ab3a9 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/realtek/rtl8366rb.c @@ -1252,6 +1252,8 @@ rtl8366rb_port_bridge_leave(struct dsa_switch *ds, int port, * @smi: SMI state container * @port: the port to drop untagged and C-tagged frames on * @drop: whether to drop or pass untagged and C-tagged frames + * + * Return: zero for success, a negative number on error. */ static int rtl8366rb_drop_untagged(struct realtek_smi *smi, int port, bool drop) { diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index ff2d099aab218..53dc8d5fede86 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -696,6 +696,12 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, buf_pool->rx_skb[skb_index] = NULL; datalen = xgene_enet_get_data_len(le64_to_cpu(raw_desc->m1)); + + /* strip off CRC as HW isn't doing this */ + nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0)); + if (!nv) + datalen -= 4; + skb_put(skb, datalen); prefetch(skb->data - NET_IP_ALIGN); skb->protocol = eth_type_trans(skb, ndev); @@ -717,12 +723,8 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, } } - nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0)); - if (!nv) { - /* strip off CRC as HW isn't doing this */ - datalen -= 4; + if (!nv) goto skip_jumbo; - } slots = page_pool->slots - 1; head = page_pool->head; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 48520967746ff..c75c5ae64d5d8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -329,7 +329,7 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info, struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg, ptp_info); struct bnxt *bp = ptp->bp; - u8 pin_id; + int pin_id; int rc; switch (rq->type) { @@ -337,6 +337,8 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info, /* Configure an External PPS IN */ pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_EXTTS, rq->extts.index); + if (!TSIO_PIN_VALID(pin_id)) + return -EOPNOTSUPP; if (!on) break; rc = bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_PPS_IN); @@ -350,6 +352,8 @@ static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info, /* Configure a Periodic PPS OUT */ pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT, rq->perout.index); + if (!TSIO_PIN_VALID(pin_id)) + return -EOPNOTSUPP; if (!on) break; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 7c528e1f8713e..8205140db829e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -31,7 +31,7 @@ struct pps_pin { u8 state; }; -#define TSIO_PIN_VALID(pin) ((pin) < (BNXT_MAX_TSIO_PINS)) +#define TSIO_PIN_VALID(pin) ((pin) >= 0 && (pin) < (BNXT_MAX_TSIO_PINS)) #define EVENT_DATA2_PPS_EVENT_TYPE(data2) \ ((data2) & ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 2da804f84b480..bd5998012a876 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -76,7 +76,7 @@ static inline void bcmgenet_writel(u32 value, void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) __raw_writel(value, offset); else - writel_relaxed(value, offset); + writel(value, offset); } static inline u32 bcmgenet_readl(void __iomem *offset) @@ -84,7 +84,7 @@ static inline u32 bcmgenet_readl(void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) return __raw_readl(offset); else - return readl_relaxed(offset); + return readl(offset); } static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index fa5b4f885b177..60ec64bfb3f0b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -674,7 +674,10 @@ static int enetc_get_ts_info(struct net_device *ndev, #ifdef CONFIG_FSL_ENETC_PTP_CLOCK info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; + SOF_TIMESTAMPING_RAW_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON) | diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 3555c12edb45a..d3d7172e0fcc5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -45,6 +45,7 @@ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed) | pspeed); } +#define ENETC_QOS_ALIGN 64 static int enetc_setup_taprio(struct net_device *ndev, struct tc_taprio_qopt_offload *admin_conf) { @@ -52,10 +53,11 @@ static int enetc_setup_taprio(struct net_device *ndev, struct enetc_cbd cbd = {.cmd = 0}; struct tgs_gcl_conf *gcl_config; struct tgs_gcl_data *gcl_data; + dma_addr_t dma, dma_align; struct gce *gce; - dma_addr_t dma; u16 data_size; u16 gcl_len; + void *tmp; u32 tge; int err; int i; @@ -82,9 +84,16 @@ static int enetc_setup_taprio(struct net_device *ndev, gcl_config = &cbd.gcl_conf; data_size = struct_size(gcl_data, entry, gcl_len); - gcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!gcl_data) + tmp = dma_alloc_coherent(&priv->si->pdev->dev, + data_size + ENETC_QOS_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&priv->si->pdev->dev, + "DMA mapping of taprio gate list failed!\n"); return -ENOMEM; + } + dma_align = ALIGN(dma, ENETC_QOS_ALIGN); + gcl_data = (struct tgs_gcl_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN); gce = (struct gce *)(gcl_data + 1); @@ -110,16 +119,8 @@ static int enetc_setup_taprio(struct net_device *ndev, cbd.length = cpu_to_le16(data_size); cbd.status_flags = 0; - dma = dma_map_single(&priv->si->pdev->dev, gcl_data, - data_size, DMA_TO_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(gcl_data); - return -ENOMEM; - } - - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); cbd.cls = BDCR_CMD_PORT_GCL; cbd.status_flags = 0; @@ -132,8 +133,8 @@ static int enetc_setup_taprio(struct net_device *ndev, ENETC_QBV_PTGCR_OFFSET, tge & (~ENETC_QBV_TGE)); - dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE); - kfree(gcl_data); + dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN, + tmp, dma); return err; } @@ -463,8 +464,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, struct enetc_cbd cbd = {.cmd = 0}; struct streamid_data *si_data; struct streamid_conf *si_conf; + dma_addr_t dma, dma_align; u16 data_size; - dma_addr_t dma; + void *tmp; int port; int err; @@ -485,21 +487,20 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct streamid_data); - si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!si_data) + tmp = dma_alloc_coherent(&priv->si->pdev->dev, + data_size + ENETC_QOS_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&priv->si->pdev->dev, + "DMA mapping of stream identify failed!\n"); return -ENOMEM; - cbd.length = cpu_to_le16(data_size); - - dma = dma_map_single(&priv->si->pdev->dev, si_data, - data_size, DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - err = -ENOMEM; - goto out; } + dma_align = ALIGN(dma, ENETC_QOS_ALIGN); + si_data = (struct streamid_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN); - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + cbd.length = cpu_to_le16(data_size); + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); eth_broadcast_addr(si_data->dmac); si_data->vid_vidm_tg = (ENETC_CBDR_SID_VID_MASK + ((0x3 << 14) | ENETC_CBDR_SID_VIDM)); @@ -539,8 +540,8 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, cbd.length = cpu_to_le16(data_size); - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); /* VIDM default to be 1. * VID Match. If set (b1) then the VID must match, otherwise @@ -561,10 +562,8 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); out: - if (!dma_mapping_error(&priv->si->pdev->dev, dma)) - dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_FROM_DEVICE); - - kfree(si_data); + dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN, + tmp, dma); return err; } @@ -633,8 +632,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, { struct enetc_cbd cbd = { .cmd = 2 }; struct sfi_counter_data *data_buf; - dma_addr_t dma; + dma_addr_t dma, dma_align; u16 data_size; + void *tmp; int err; cbd.index = cpu_to_le16((u16)index); @@ -643,19 +643,19 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct sfi_counter_data); - data_buf = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!data_buf) + tmp = dma_alloc_coherent(&priv->si->pdev->dev, + data_size + ENETC_QOS_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&priv->si->pdev->dev, + "DMA mapping of stream counter failed!\n"); return -ENOMEM; - - dma = dma_map_single(&priv->si->pdev->dev, data_buf, - data_size, DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - err = -ENOMEM; - goto exit; } - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + dma_align = ALIGN(dma, ENETC_QOS_ALIGN); + data_buf = (struct sfi_counter_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN); + + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); cbd.length = cpu_to_le16(data_size); @@ -684,7 +684,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, data_buf->flow_meter_dropl; exit: - kfree(data_buf); + dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN, + tmp, dma); + return err; } @@ -723,9 +725,10 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, struct sgcl_conf *sgcl_config; struct sgcl_data *sgcl_data; struct sgce *sgce; - dma_addr_t dma; + dma_addr_t dma, dma_align; u16 data_size; int err, i; + void *tmp; u64 now; cbd.index = cpu_to_le16(sgi->index); @@ -772,24 +775,20 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, sgcl_config->acl_len = (sgi->num_entries - 1) & 0x3; data_size = struct_size(sgcl_data, sgcl, sgi->num_entries); - - sgcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); - if (!sgcl_data) - return -ENOMEM; - - cbd.length = cpu_to_le16(data_size); - - dma = dma_map_single(&priv->si->pdev->dev, - sgcl_data, data_size, - DMA_FROM_DEVICE); - if (dma_mapping_error(&priv->si->pdev->dev, dma)) { - netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(sgcl_data); + tmp = dma_alloc_coherent(&priv->si->pdev->dev, + data_size + ENETC_QOS_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&priv->si->pdev->dev, + "DMA mapping of stream counter failed!\n"); return -ENOMEM; } + dma_align = ALIGN(dma, ENETC_QOS_ALIGN); + sgcl_data = (struct sgcl_data *)PTR_ALIGN(tmp, ENETC_QOS_ALIGN); - cbd.addr[0] = cpu_to_le32(lower_32_bits(dma)); - cbd.addr[1] = cpu_to_le32(upper_32_bits(dma)); + cbd.length = cpu_to_le16(data_size); + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); sgce = &sgcl_data->sgcl[0]; @@ -844,7 +843,8 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); exit: - kfree(sgcl_data); + dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN, + tmp, dma); return err; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 9298fbecb31ac..8184a954f6481 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -536,6 +536,8 @@ struct hnae3_ae_dev { * Get 1588 rx hwstamp * get_ts_info * Get phc info + * clean_vf_config + * Clean residual vf info after disable sriov */ struct hnae3_ae_ops { int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev); @@ -729,6 +731,7 @@ struct hnae3_ae_ops { struct ethtool_ts_info *info); int (*get_link_diagnosis_info)(struct hnae3_handle *handle, u32 *status_code); + void (*clean_vf_config)(struct hnae3_ae_dev *ae_dev, int num_vfs); }; struct hnae3_dcb_ops { @@ -841,6 +844,7 @@ struct hnae3_handle { struct dentry *hnae3_dbgfs; /* protects concurrent contention between debugfs commands */ struct mutex dbgfs_lock; + char **dbgfs_buf; /* Network interface message level enabled bits */ u32 msg_enable; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index f726a5b70f9e2..44d9b560b3374 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1227,7 +1227,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, return ret; mutex_lock(&handle->dbgfs_lock); - save_buf = &hns3_dbg_cmd[index].buf; + save_buf = &handle->dbgfs_buf[index]; if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) || test_bit(HNS3_NIC_STATE_RESETTING, &priv->state)) { @@ -1332,6 +1332,13 @@ int hns3_dbg_init(struct hnae3_handle *handle) int ret; u32 i; + handle->dbgfs_buf = devm_kcalloc(&handle->pdev->dev, + ARRAY_SIZE(hns3_dbg_cmd), + sizeof(*handle->dbgfs_buf), + GFP_KERNEL); + if (!handle->dbgfs_buf) + return -ENOMEM; + hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry = debugfs_create_dir(name, hns3_dbgfs_root); handle->hnae3_dbgfs = hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry; @@ -1380,9 +1387,9 @@ void hns3_dbg_uninit(struct hnae3_handle *handle) u32 i; for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) - if (hns3_dbg_cmd[i].buf) { - kvfree(hns3_dbg_cmd[i].buf); - hns3_dbg_cmd[i].buf = NULL; + if (handle->dbgfs_buf[i]) { + kvfree(handle->dbgfs_buf[i]); + handle->dbgfs_buf[i] = NULL; } mutex_destroy(&handle->dbgfs_lock); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h index 83aa1450ab9fe..97578eabb7d8b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h @@ -49,7 +49,6 @@ struct hns3_dbg_cmd_info { enum hnae3_dbg_cmd cmd; enum hns3_dbg_dentry_type dentry; u32 buf_len; - char *buf; int (*init)(struct hnae3_handle *handle, unsigned int cmd); }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index babc5d7a3b526..f6082be7481c1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1028,46 +1028,56 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring, static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) { + u32 alloc_size = ring->tqp->handle->kinfo.tx_spare_buf_size; struct hns3_tx_spare *tx_spare; struct page *page; - u32 alloc_size; dma_addr_t dma; int order; - alloc_size = ring->tqp->handle->kinfo.tx_spare_buf_size; if (!alloc_size) return; order = get_order(alloc_size); + if (order >= MAX_ORDER) { + if (net_ratelimit()) + dev_warn(ring_to_dev(ring), "failed to allocate tx spare buffer, exceed to max order\n"); + return; + } + tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare), GFP_KERNEL); if (!tx_spare) { /* The driver still work without the tx spare buffer */ dev_warn(ring_to_dev(ring), "failed to allocate hns3_tx_spare\n"); - return; + goto devm_kzalloc_error; } page = alloc_pages_node(dev_to_node(ring_to_dev(ring)), GFP_KERNEL, order); if (!page) { dev_warn(ring_to_dev(ring), "failed to allocate tx spare pages\n"); - devm_kfree(ring_to_dev(ring), tx_spare); - return; + goto alloc_pages_error; } dma = dma_map_page(ring_to_dev(ring), page, 0, PAGE_SIZE << order, DMA_TO_DEVICE); if (dma_mapping_error(ring_to_dev(ring), dma)) { dev_warn(ring_to_dev(ring), "failed to map pages for tx spare\n"); - put_page(page); - devm_kfree(ring_to_dev(ring), tx_spare); - return; + goto dma_mapping_error; } tx_spare->dma = dma; tx_spare->buf = page_address(page); tx_spare->len = PAGE_SIZE << order; ring->tx_spare = tx_spare; + return; + +dma_mapping_error: + put_page(page); +alloc_pages_error: + devm_kfree(ring_to_dev(ring), tx_spare); +devm_kzalloc_error: + ring->tqp->handle->kinfo.tx_spare_buf_size = 0; } /* Use hns3_tx_spare_space() to make sure there is enough buffer @@ -2982,6 +2992,21 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return ret; } +/** + * hns3_clean_vf_config + * @pdev: pointer to a pci_dev structure + * @num_vfs: number of VFs allocated + * + * Clean residual vf config after disable sriov + **/ +static void hns3_clean_vf_config(struct pci_dev *pdev, int num_vfs) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); + + if (ae_dev->ops->clean_vf_config) + ae_dev->ops->clean_vf_config(ae_dev, num_vfs); +} + /* hns3_remove - Device removal routine * @pdev: PCI device information struct */ @@ -3020,7 +3045,10 @@ static int hns3_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) else return num_vfs; } else if (!pci_vfs_assigned(pdev)) { + int num_vfs_pre = pci_num_vf(pdev); + pci_disable_sriov(pdev); + hns3_clean_vf_config(pdev, num_vfs_pre); } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index c06c39ece80da..cbf36cc86803a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -651,8 +651,8 @@ static void hns3_get_ringparam(struct net_device *netdev, struct hnae3_handle *h = priv->ae_handle; int rx_queue_index = h->kinfo.num_tqps; - if (hns3_nic_resetting(netdev)) { - netdev_err(netdev, "dev resetting!"); + if (hns3_nic_resetting(netdev) || !priv->ring) { + netdev_err(netdev, "failed to get ringparam value, due to dev resetting or uninited\n"); return; } @@ -1072,8 +1072,14 @@ static int hns3_check_ringparam(struct net_device *ndev, { #define RX_BUF_LEN_2K 2048 #define RX_BUF_LEN_4K 4096 - if (hns3_nic_resetting(ndev)) + + struct hns3_nic_priv *priv = netdev_priv(ndev); + + if (hns3_nic_resetting(ndev) || !priv->ring) { + netdev_err(ndev, "failed to set ringparam value, due to dev resetting or uninited\n"); return -EBUSY; + } + if (param->rx_mini_pending || param->rx_jumbo_pending) return -EINVAL; @@ -1764,9 +1770,6 @@ static int hns3_set_tx_spare_buf_size(struct net_device *netdev, struct hnae3_handle *h = priv->ae_handle; int ret; - if (hns3_nic_resetting(netdev)) - return -EBUSY; - h->kinfo.tx_spare_buf_size = data; ret = hns3_reset_notify(h, HNAE3_DOWN_CLIENT); @@ -1797,6 +1800,11 @@ static int hns3_set_tunable(struct net_device *netdev, struct hnae3_handle *h = priv->ae_handle; int i, ret = 0; + if (hns3_nic_resetting(netdev) || !priv->ring) { + netdev_err(netdev, "failed to set tunable value, dev resetting!"); + return -EBUSY; + } + switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: priv->tx_copybreak = *(u32 *)data; @@ -1816,7 +1824,8 @@ static int hns3_set_tunable(struct net_device *netdev, old_tx_spare_buf_size = h->kinfo.tx_spare_buf_size; new_tx_spare_buf_size = *(u32 *)data; ret = hns3_set_tx_spare_buf_size(netdev, new_tx_spare_buf_size); - if (ret) { + if (ret || + (!priv->ring->tx_spare && new_tx_spare_buf_size != 0)) { int ret1; netdev_warn(netdev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 24f7afacae028..e96bc61a0a877 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1863,6 +1863,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) vport->vf_info.link_state = IFLA_VF_LINK_STATE_AUTO; vport->mps = HCLGE_MAC_DEFAULT_FRAME; vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE; + vport->port_base_vlan_cfg.tbl_sta = true; vport->rxvlan_cfg.rx_vlan_offload_en = true; vport->req_vlan_fltr_en = true; INIT_LIST_HEAD(&vport->vlan_list); @@ -8429,12 +8430,11 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport, hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0); hclge_prepare_mac_addr(&req, addr, false); ret = hclge_remove_mac_vlan_tbl(vport, &req); - if (!ret) { + if (!ret || ret == -ENOENT) { mutex_lock(&hdev->vport_lock); hclge_update_umv_space(vport, true); mutex_unlock(&hdev->vport_lock); - } else if (ret == -ENOENT) { - ret = 0; + return 0; } return ret; @@ -8984,11 +8984,16 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, ether_addr_copy(vport->vf_info.mac, mac_addr); + /* there is a timewindow for PF to know VF unalive, it may + * cause send mailbox fail, but it doesn't matter, VF will + * query it when reinit. + */ if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) { dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s, and it will be reinitialized!\n", vf, format_mac_addr); - return hclge_inform_reset_assert_to_vf(vport); + (void)hclge_inform_reset_assert_to_vf(vport); + return 0; } dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s\n", @@ -9809,19 +9814,28 @@ static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, bool writen_to_tbl) { struct hclge_vport_vlan_cfg *vlan, *tmp; + struct hclge_dev *hdev = vport->back; - list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) - if (vlan->vlan_id == vlan_id) + mutex_lock(&hdev->vport_lock); + + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { + if (vlan->vlan_id == vlan_id) { + mutex_unlock(&hdev->vport_lock); return; + } + } vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); - if (!vlan) + if (!vlan) { + mutex_unlock(&hdev->vport_lock); return; + } vlan->hd_tbl_status = writen_to_tbl; vlan->vlan_id = vlan_id; list_add_tail(&vlan->node, &vport->vlan_list); + mutex_unlock(&hdev->vport_lock); } static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) @@ -9830,6 +9844,8 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) struct hclge_dev *hdev = vport->back; int ret; + mutex_lock(&hdev->vport_lock); + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (!vlan->hd_tbl_status) { ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), @@ -9839,12 +9855,16 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) dev_err(&hdev->pdev->dev, "restore vport vlan list failed, ret=%d\n", ret); + + mutex_unlock(&hdev->vport_lock); return ret; } } vlan->hd_tbl_status = true; } + mutex_unlock(&hdev->vport_lock); + return 0; } @@ -9854,6 +9874,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, struct hclge_vport_vlan_cfg *vlan, *tmp; struct hclge_dev *hdev = vport->back; + mutex_lock(&hdev->vport_lock); + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (vlan->vlan_id == vlan_id) { if (is_write_tbl && vlan->hd_tbl_status) @@ -9868,6 +9890,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, break; } } + + mutex_unlock(&hdev->vport_lock); } void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) @@ -9875,6 +9899,8 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) struct hclge_vport_vlan_cfg *vlan, *tmp; struct hclge_dev *hdev = vport->back; + mutex_lock(&hdev->vport_lock); + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (vlan->hd_tbl_status) hclge_set_vlan_filter_hw(hdev, @@ -9890,6 +9916,7 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) } } clear_bit(vport->vport_id, hdev->vf_vlan_full); + mutex_unlock(&hdev->vport_lock); } void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) @@ -9898,6 +9925,8 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) struct hclge_vport *vport; int i; + mutex_lock(&hdev->vport_lock); + for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { @@ -9905,37 +9934,61 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) kfree(vlan); } } + + mutex_unlock(&hdev->vport_lock); } -void hclge_restore_vport_vlan_table(struct hclge_vport *vport) +void hclge_restore_vport_port_base_vlan_config(struct hclge_dev *hdev) { - struct hclge_vport_vlan_cfg *vlan, *tmp; - struct hclge_dev *hdev = vport->back; + struct hclge_vlan_info *vlan_info; + struct hclge_vport *vport; u16 vlan_proto; u16 vlan_id; u16 state; + int vf_id; int ret; - vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto; - vlan_id = vport->port_base_vlan_cfg.vlan_info.vlan_tag; - state = vport->port_base_vlan_cfg.state; + /* PF should restore all vfs port base vlan */ + for (vf_id = 0; vf_id < hdev->num_alloc_vfs; vf_id++) { + vport = &hdev->vport[vf_id + HCLGE_VF_VPORT_START_NUM]; + vlan_info = vport->port_base_vlan_cfg.tbl_sta ? + &vport->port_base_vlan_cfg.vlan_info : + &vport->port_base_vlan_cfg.old_vlan_info; - if (state != HNAE3_PORT_BASE_VLAN_DISABLE) { - clear_bit(vport->vport_id, hdev->vlan_table[vlan_id]); - hclge_set_vlan_filter_hw(hdev, htons(vlan_proto), - vport->vport_id, vlan_id, - false); - return; + vlan_id = vlan_info->vlan_tag; + vlan_proto = vlan_info->vlan_proto; + state = vport->port_base_vlan_cfg.state; + + if (state != HNAE3_PORT_BASE_VLAN_DISABLE) { + clear_bit(vport->vport_id, hdev->vlan_table[vlan_id]); + ret = hclge_set_vlan_filter_hw(hdev, htons(vlan_proto), + vport->vport_id, + vlan_id, false); + vport->port_base_vlan_cfg.tbl_sta = ret == 0; + } } +} - list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { - ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), - vport->vport_id, - vlan->vlan_id, false); - if (ret) - break; - vlan->hd_tbl_status = true; +void hclge_restore_vport_vlan_table(struct hclge_vport *vport) +{ + struct hclge_vport_vlan_cfg *vlan, *tmp; + struct hclge_dev *hdev = vport->back; + int ret; + + mutex_lock(&hdev->vport_lock); + + if (vport->port_base_vlan_cfg.state == HNAE3_PORT_BASE_VLAN_DISABLE) { + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { + ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), + vport->vport_id, + vlan->vlan_id, false); + if (ret) + break; + vlan->hd_tbl_status = true; + } } + + mutex_unlock(&hdev->vport_lock); } /* For global reset and imp reset, hardware will clear the mac table, @@ -9975,6 +10028,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev) struct hnae3_handle *handle = &vport->nic; hclge_restore_mac_table_common(vport); + hclge_restore_vport_port_base_vlan_config(hdev); hclge_restore_vport_vlan_table(vport); set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); hclge_restore_fd_entries(handle); @@ -10031,6 +10085,8 @@ static int hclge_update_vlan_filter_entries(struct hclge_vport *vport, false); } + vport->port_base_vlan_cfg.tbl_sta = false; + /* force add VLAN 0 */ ret = hclge_set_vf_vlan_common(hdev, vport->vport_id, false, 0); if (ret) @@ -10120,7 +10176,9 @@ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state, else nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE; + vport->port_base_vlan_cfg.old_vlan_info = *old_vlan_info; vport->port_base_vlan_cfg.vlan_info = *vlan_info; + vport->port_base_vlan_cfg.tbl_sta = true; hclge_set_vport_vlan_fltr_change(vport); return 0; @@ -10188,14 +10246,17 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, return ret; } - /* for DEVICE_VERSION_V3, vf doesn't need to know about the port based + /* there is a timewindow for PF to know VF unalive, it may + * cause send mailbox fail, but it doesn't matter, VF will + * query it when reinit. + * for DEVICE_VERSION_V3, vf doesn't need to know about the port based * VLAN state. */ if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) - hclge_push_vf_port_base_vlan_info(&hdev->vport[0], - vport->vport_id, state, - &vlan_info); + (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], + vport->vport_id, + state, &vlan_info); return 0; } @@ -10253,11 +10314,11 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, } if (!ret) { - if (is_kill) - hclge_rm_vport_vlan_table(vport, vlan_id, false); - else + if (!is_kill) hclge_add_vport_vlan_table(vport, vlan_id, writen_to_tbl); + else if (is_kill && vlan_id != 0) + hclge_rm_vport_vlan_table(vport, vlan_id, false); } else if (is_kill) { /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence @@ -11831,8 +11892,8 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_misc_irq_uninit(hdev); hclge_devlink_uninit(hdev); hclge_pci_uninit(hdev); - mutex_destroy(&hdev->vport_lock); hclge_uninit_vport_vlan_table(hdev); + mutex_destroy(&hdev->vport_lock); ae_dev->priv = NULL; } @@ -12656,6 +12717,55 @@ static int hclge_get_link_diagnosis_info(struct hnae3_handle *handle, return 0; } +/* After disable sriov, VF still has some config and info need clean, + * which configed by PF. + */ +static void hclge_clear_vport_vf_info(struct hclge_vport *vport, int vfid) +{ + struct hclge_dev *hdev = vport->back; + struct hclge_vlan_info vlan_info; + int ret; + + /* after disable sriov, clean VF rate configured by PF */ + ret = hclge_tm_qs_shaper_cfg(vport, 0); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to clean vf%d rate config, ret = %d\n", + vfid, ret); + + vlan_info.vlan_tag = 0; + vlan_info.qos = 0; + vlan_info.vlan_proto = ETH_P_8021Q; + ret = hclge_update_port_base_vlan_cfg(vport, + HNAE3_PORT_BASE_VLAN_DISABLE, + &vlan_info); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to clean vf%d port base vlan, ret = %d\n", + vfid, ret); + + ret = hclge_set_vf_spoofchk_hw(hdev, vport->vport_id, false); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to clean vf%d spoof config, ret = %d\n", + vfid, ret); + + memset(&vport->vf_info, 0, sizeof(vport->vf_info)); +} + +static void hclge_clean_vport_config(struct hnae3_ae_dev *ae_dev, int num_vfs) +{ + struct hclge_dev *hdev = ae_dev->priv; + struct hclge_vport *vport; + int i; + + for (i = 0; i < num_vfs; i++) { + vport = &hdev->vport[i + HCLGE_VF_VPORT_START_NUM]; + + hclge_clear_vport_vf_info(vport, i); + } +} + static const struct hnae3_ae_ops hclge_ops = { .init_ae_dev = hclge_init_ae_dev, .uninit_ae_dev = hclge_uninit_ae_dev, @@ -12757,6 +12867,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_rx_hwts = hclge_ptp_get_rx_hwts, .get_ts_info = hclge_ptp_get_ts_info, .get_link_diagnosis_info = hclge_get_link_diagnosis_info, + .clean_vf_config = hclge_clean_vport_config, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index adfb26e792621..63197257dd4e4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -977,7 +977,9 @@ struct hclge_vlan_info { struct hclge_port_base_vlan_config { u16 state; + bool tbl_sta; struct hclge_vlan_info vlan_info; + struct hclge_vlan_info old_vlan_info; }; struct hclge_vf_info { @@ -1023,6 +1025,7 @@ struct hclge_vport { spinlock_t mac_list_lock; /* protect mac address need to add/detele */ struct list_head uc_mac_list; /* Store VF unicast table */ struct list_head mc_mac_list; /* Store VF multicast table */ + struct list_head vlan_list; /* Store VF vlan table */ }; @@ -1097,6 +1100,7 @@ void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list, void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list); void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev); void hclge_restore_mac_table_common(struct hclge_vport *vport); +void hclge_restore_vport_port_base_vlan_config(struct hclge_dev *hdev); void hclge_restore_vport_vlan_table(struct hclge_vport *vport); int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state, struct hclge_vlan_info *vlan_info); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 63d2be4349e3e..03d63b6a9b2bc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -48,7 +48,7 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum, int ret; if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state)) - return 0; + return -EBUSY; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false); @@ -86,7 +86,7 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum) int ret; if (test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state)) - return 0; + return -EBUSY; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 21442a9bb9961..90c6197d9374c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2855,6 +2855,11 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) return ret; } + /* get current port based vlan state from PF */ + ret = hclgevf_get_port_base_vlan_filter_state(hdev); + if (ret) + return ret; + set_bit(HCLGEVF_STATE_PROMISC_CHANGED, &hdev->state); hclgevf_init_rxd_adv_layout(hdev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index b423e94956f10..b4804ce63151f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1429,6 +1429,15 @@ static int __ibmvnic_open(struct net_device *netdev) return rc; } + adapter->tx_queues_active = true; + + /* Since queues were stopped until now, there shouldn't be any + * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we + * don't need the synchronize_rcu()? Leaving it for consistency + * with setting ->tx_queues_active = false. + */ + synchronize_rcu(); + netif_tx_start_all_queues(netdev); if (prev_state == VNIC_CLOSED) { @@ -1603,6 +1612,14 @@ static void ibmvnic_cleanup(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); /* ensure that transmissions are stopped if called by do_reset */ + + adapter->tx_queues_active = false; + + /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active + * update so they don't restart a queue after we stop it below. + */ + synchronize_rcu(); + if (test_bit(0, &adapter->resetting)) netif_tx_disable(netdev); else @@ -1842,14 +1859,21 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, tx_buff->skb = NULL; adapter->netdev->stats.tx_dropped++; } + ind_bufp->index = 0; + if (atomic_sub_return(entries, &tx_scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && - __netif_subqueue_stopped(adapter->netdev, queue_num) && - !test_bit(0, &adapter->resetting)) { - netif_wake_subqueue(adapter->netdev, queue_num); - netdev_dbg(adapter->netdev, "Started queue %d\n", - queue_num); + __netif_subqueue_stopped(adapter->netdev, queue_num)) { + rcu_read_lock(); + + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, queue_num); + netdev_dbg(adapter->netdev, "Started queue %d\n", + queue_num); + } + + rcu_read_unlock(); } } @@ -1904,11 +1928,12 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) int index = 0; u8 proto = 0; - tx_scrq = adapter->tx_scrq[queue_num]; - txq = netdev_get_tx_queue(netdev, queue_num); - ind_bufp = &tx_scrq->ind_buf; - - if (test_bit(0, &adapter->resetting)) { + /* If a reset is in progress, drop the packet since + * the scrqs may get torn down. Otherwise use the + * rcu to ensure reset waits for us to complete. + */ + rcu_read_lock(); + if (!adapter->tx_queues_active) { dev_kfree_skb_any(skb); tx_send_failed++; @@ -1917,6 +1942,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) goto out; } + tx_scrq = adapter->tx_scrq[queue_num]; + txq = netdev_get_tx_queue(netdev, queue_num); + ind_bufp = &tx_scrq->ind_buf; + if (ibmvnic_xmit_workarounds(skb, netdev)) { tx_dropped++; tx_send_failed++; @@ -1924,6 +1953,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } + if (skb_is_gso(skb)) tx_pool = &adapter->tso_pool[queue_num]; else @@ -2078,6 +2108,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) netif_carrier_off(netdev); } out: + rcu_read_unlock(); netdev->stats.tx_dropped += tx_dropped; netdev->stats.tx_bytes += tx_bytes; netdev->stats.tx_packets += tx_packets; @@ -3732,9 +3763,15 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, (adapter->req_tx_entries_per_subcrq / 2) && __netif_subqueue_stopped(adapter->netdev, scrq->pool_index)) { - netif_wake_subqueue(adapter->netdev, scrq->pool_index); - netdev_dbg(adapter->netdev, "Started queue %d\n", - scrq->pool_index); + rcu_read_lock(); + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + rcu_read_unlock(); } } diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index fa2d607a7b1b9..8f5cefb932dd1 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1006,11 +1006,14 @@ struct ibmvnic_adapter { struct work_struct ibmvnic_reset; struct delayed_work ibmvnic_delayed_reset; unsigned long resetting; - bool napi_enabled, from_passive_init; - bool login_pending; /* last device reset time */ unsigned long last_reset_time; + bool napi_enabled; + bool from_passive_init; + bool login_pending; + /* protected by rcu */ + bool tx_queues_active; bool failover_pending; bool force_reset_recovery; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 945b1bb9c6f40..e5e72b5bb6196 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -218,7 +218,6 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) ntu += nb_buffs; if (ntu == rx_ring->count) { rx_desc = I40E_RX_DESC(rx_ring, 0); - xdp = i40e_rx_bi(rx_ring, 0); ntu = 0; } @@ -241,21 +240,25 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto out; - skb_reserve(skb, xdp->data - xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } out: xsk_buff_free(xdp); @@ -324,11 +327,11 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); u16 next_to_clean = rx_ring->next_to_clean; u16 count_mask = rx_ring->count - 1; unsigned int xdp_res, xdp_xmit = 0; bool failure = false; + u16 cleaned_count; while (likely(total_rx_packets < (unsigned int)budget)) { union i40e_rx_desc *rx_desc; diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index bea1d1e39fa27..2f60230d332a6 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -290,6 +290,7 @@ enum ice_pf_state { ICE_LINK_DEFAULT_OVERRIDE_PENDING, ICE_PHY_INIT_COMPLETE, ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ + ICE_AUX_ERR_PENDING, ICE_STATE_NBITS /* must be last */ }; @@ -559,6 +560,7 @@ struct ice_pf { wait_queue_head_t reset_wait_queue; u32 hw_csum_rx_error; + u32 oicr_err_reg; u16 oicr_idx; /* Other interrupt cause MSIX vector index */ u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ u16 max_pf_txqs; /* Total Tx queues PF wide */ @@ -710,7 +712,7 @@ static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) struct ice_vsi *vsi = ring->vsi; u16 qid; - qid = ring->q_index - vsi->num_xdp_txq; + qid = ring->q_index - vsi->alloc_txq; if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index fc3580167e7b5..5559230eff8b5 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -34,6 +34,9 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event) { struct iidc_auxiliary_drv *iadrv; + if (WARN_ON_ONCE(!in_task())) + return; + if (!pf->adev) return; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b7e8744b0c0a6..296f9d5f74084 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2255,6 +2255,19 @@ static void ice_service_task(struct work_struct *work) return; } + if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) { + struct iidc_event *event; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (event) { + set_bit(IIDC_EVENT_CRIT_ERR, event->type); + /* report the entire OICR value to AUX driver */ + swap(event->reg, pf->oicr_err_reg); + ice_send_event_to_aux(pf, event); + kfree(event); + } + } + if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) { /* Plug aux device per request */ ice_plug_aux_dev(pf); @@ -3041,17 +3054,9 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M) if (oicr & ICE_AUX_CRIT_ERR) { - struct iidc_event *event; - + pf->oicr_err_reg |= oicr; + set_bit(ICE_AUX_ERR_PENDING, pf->state); ena_mask &= ~ICE_AUX_CRIT_ERR; - event = kzalloc(sizeof(*event), GFP_ATOMIC); - if (event) { - set_bit(IIDC_EVENT_CRIT_ERR, event->type); - /* report the entire OICR value to AUX driver */ - event->reg = oicr; - ice_send_event_to_aux(pf, event); - kfree(event); - } } /* Report any remaining unexpected interrupts */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 2388837d6d6c9..feb874bde171f 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -428,20 +428,24 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring) static struct sk_buff * ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { - unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data - xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } xsk_buff_free(xdp); return skb; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 51a2dcaf553de..2a5782063f4c8 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -965,10 +965,6 @@ static int igb_set_ringparam(struct net_device *netdev, memcpy(&temp_ring[i], adapter->rx_ring[i], sizeof(struct igb_ring)); - /* Clear copied XDP RX-queue info */ - memset(&temp_ring[i].xdp_rxq, 0, - sizeof(temp_ring[i].xdp_rxq)); - temp_ring[i].count = new_rx_count; err = igb_setup_rx_resources(&temp_ring[i]); if (err) { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 38ba92022cd45..c1e4ad65b02de 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4352,7 +4352,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct igb_adapter *adapter = netdev_priv(rx_ring->netdev); struct device *dev = rx_ring->dev; - int size; + int size, res; + + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0); + if (res < 0) { + dev_err(dev, "Failed to register xdp_rxq index %u\n", + rx_ring->queue_index); + return res; + } size = sizeof(struct igb_rx_buffer) * rx_ring->count; @@ -4375,14 +4386,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) rx_ring->xdp_prog = adapter->xdp_prog; - /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->queue_index, 0) < 0) - goto err; - return 0; err: + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2f17f36e94fde..2a9ae53238f73 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -505,6 +505,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) u8 index = rx_ring->queue_index; int size, desc_len, res; + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, rx_ring->q_vector->napi.napi_id); if (res < 0) { @@ -2446,19 +2449,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; - unsigned int totalsize = metasize + datasize; struct sk_buff *skb; - skb = __napi_alloc_skb(&ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); - memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize); + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + if (metasize) { skb_metadata_set(skb, metasize); __skb_pull(skb, metasize); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 6a5e9cf6b5dac..dd7ff66d422f0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -207,26 +207,28 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) } static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *bi) + const struct xdp_buff *xdp) { - unsigned int metasize = bi->xdp->data - bi->xdp->data_meta; - unsigned int datasize = bi->xdp->data_end - bi->xdp->data; + unsigned int totalsize = xdp->data_end - xdp->data_meta; + unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - bi->xdp->data_end - bi->xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } - xsk_buff_free(bi->xdp); - bi->xdp = NULL; return skb; } @@ -317,12 +319,15 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, } /* XDP_PASS path */ - skb = ixgbe_construct_skb_zc(rx_ring, bi); + skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp); if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; break; } + xsk_buff_free(bi->xdp); + bi->xdp = NULL; + cleaned_count++; ixgbe_inc_ntc(rx_ring); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 91f86d77cd41b..3a31fb8cc1554 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -605,7 +605,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, struct npc_install_flow_req req = { 0 }; struct npc_install_flow_rsp rsp = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; - struct nix_rx_action action; + struct nix_rx_action action = { 0 }; int blkaddr, index; /* AF's and SDP VFs work in promiscuous mode */ @@ -626,7 +626,6 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, index); } else { - *(u64 *)&action = 0x00; action.op = NIX_RX_ACTIONOP_UCAST; action.pf_func = pcifunc; } @@ -657,7 +656,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, struct npc_mcam *mcam = &rvu->hw->mcam; struct rvu_hwinfo *hw = rvu->hw; int blkaddr, ucast_idx, index; - struct nix_rx_action action; + struct nix_rx_action action = { 0 }; u64 relaxed_mask; if (!hw->cap.nix_rx_multicast && is_cgx_vf(rvu, pcifunc)) @@ -685,14 +684,14 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, blkaddr, ucast_idx); if (action.op != NIX_RX_ACTIONOP_RSS) { - *(u64 *)&action = 0x00; + *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_UCAST; } /* RX_ACTION set to MCAST for CGX PF's */ if (hw->cap.nix_rx_multicast && pfvf->use_mce_list && is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) { - *(u64 *)&action = 0x00; + *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_MCAST; pfvf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK); action.index = pfvf->promisc_mce_idx; @@ -832,7 +831,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, struct rvu_hwinfo *hw = rvu->hw; int blkaddr, ucast_idx, index; u8 mac_addr[ETH_ALEN] = { 0 }; - struct nix_rx_action action; + struct nix_rx_action action = { 0 }; struct rvu_pfvf *pfvf; u16 vf_func; @@ -861,14 +860,14 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, blkaddr, ucast_idx); if (action.op != NIX_RX_ACTIONOP_RSS) { - *(u64 *)&action = 0x00; + *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_UCAST; action.pf_func = pcifunc; } /* RX_ACTION set to MCAST for CGX PF's */ if (hw->cap.nix_rx_multicast && pfvf->use_mce_list) { - *(u64 *)&action = 0x00; + *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_MCAST; action.index = pfvf->mcast_mce_idx; } diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig index 7bdbb2d09a148..cc5e48e1bb4c3 100644 --- a/drivers/net/ethernet/microchip/sparx5/Kconfig +++ b/drivers/net/ethernet/microchip/sparx5/Kconfig @@ -4,6 +4,8 @@ config SPARX5_SWITCH depends on HAS_IOMEM depends on OF depends on ARCH_SPARX5 || COMPILE_TEST + depends on PTP_1588_CLOCK_OPTIONAL + depends on BRIDGE || BRIDGE=n select PHYLINK select PHY_SPARX5_SERDES select RESET_CONTROLLER diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 7436f62fa1525..174ad95e746a3 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -420,6 +420,8 @@ static int sparx5_fdma_tx_alloc(struct sparx5 *sparx5) db_hw->dataptr = phys; db_hw->status = 0; db = devm_kzalloc(sparx5->dev, sizeof(*db), GFP_KERNEL); + if (!db) + return -ENOMEM; db->cpu_addr = cpu_addr; list_add_tail(&db->list, &tx->db_list); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 7e296fa71b368..40fa5bce2ac2c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -331,6 +331,9 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_deregister_lifs; } + mod_timer(&ionic->watchdog_timer, + round_jiffies(jiffies + ionic->watchdog_period)); + return 0; err_out_deregister_lifs: @@ -348,7 +351,6 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_reset: ionic_reset(ionic); err_out_teardown: - del_timer_sync(&ionic->watchdog_timer); pci_clear_master(pdev); /* Don't fail the probe for these errors, keep * the hw interface around for inspection diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index d57e80d44c9df..2c7ce820a1fa7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -122,9 +122,6 @@ int ionic_dev_setup(struct ionic *ionic) idev->fw_generation = IONIC_FW_STS_F_GENERATION & ioread8(&idev->dev_info_regs->fw_status); - mod_timer(&ionic->watchdog_timer, - round_jiffies(jiffies + ionic->watchdog_period)); - idev->db_pages = bar->vaddr; idev->phy_db_pages = bar->bus_addr; @@ -132,6 +129,16 @@ int ionic_dev_setup(struct ionic *ionic) } /* Devcmd Interface */ +bool ionic_is_fw_running(struct ionic_dev *idev) +{ + u8 fw_status = ioread8(&idev->dev_info_regs->fw_status); + + /* firmware is useful only if the running bit is set and + * fw_status != 0xff (bad PCI read) + */ + return (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING); +} + int ionic_heartbeat_check(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; @@ -155,13 +162,10 @@ int ionic_heartbeat_check(struct ionic *ionic) goto do_check_time; } - /* firmware is useful only if the running bit is set and - * fw_status != 0xff (bad PCI read) - * If fw_status is not ready don't bother with the generation. - */ fw_status = ioread8(&idev->dev_info_regs->fw_status); - if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) { + /* If fw_status is not ready don't bother with the generation */ + if (!ionic_is_fw_running(idev)) { fw_status_ready = false; } else { fw_generation = fw_status & IONIC_FW_STS_F_GENERATION; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index e5acf3bd62b2d..73b950ac12722 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -353,5 +353,6 @@ void ionic_q_rewind(struct ionic_queue *q, struct ionic_desc_info *start); void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info, unsigned int stop_index); int ionic_heartbeat_check(struct ionic *ionic); +bool ionic_is_fw_running(struct ionic_dev *idev); #endif /* _IONIC_DEV_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 875f4ec42efee..a0f9136b2d899 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -215,9 +215,13 @@ static void ionic_adminq_flush(struct ionic_lif *lif) void ionic_adminq_netdev_err_print(struct ionic_lif *lif, u8 opcode, u8 status, int err) { + const char *stat_str; + + stat_str = (err == -ETIMEDOUT) ? "TIMEOUT" : + ionic_error_to_str(status); + netdev_err(lif->netdev, "%s (%d) failed: %s (%d)\n", - ionic_opcode_to_str(opcode), opcode, - ionic_error_to_str(status), err); + ionic_opcode_to_str(opcode), opcode, stat_str, err); } static int ionic_adminq_check_err(struct ionic_lif *lif, @@ -318,6 +322,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, if (do_msg && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) netdev_err(netdev, "Posting of %s (%d) failed: %d\n", name, ctx->cmd.cmd.opcode, err); + ctx->comp.comp.status = IONIC_RC_ERROR; return err; } @@ -336,6 +341,7 @@ int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, if (do_msg) netdev_err(netdev, "%s (%d) interrupted, FW in reset\n", name, ctx->cmd.cmd.opcode); + ctx->comp.comp.status = IONIC_RC_ERROR; return -ENXIO; } @@ -370,10 +376,10 @@ int ionic_adminq_post_wait_nomsg(struct ionic_lif *lif, struct ionic_admin_ctx * static void ionic_dev_cmd_clean(struct ionic *ionic) { - union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs; + struct ionic_dev *idev = &ionic->idev; - iowrite32(0, ®s->doorbell); - memset_io(®s->cmd, 0, sizeof(regs->cmd)); + iowrite32(0, &idev->dev_cmd_regs->doorbell); + memset_io(&idev->dev_cmd_regs->cmd, 0, sizeof(idev->dev_cmd_regs->cmd)); } int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds) @@ -540,6 +546,9 @@ int ionic_reset(struct ionic *ionic) struct ionic_dev *idev = &ionic->idev; int err; + if (!ionic_is_fw_running(idev)) + return 0; + mutex_lock(&ionic->dev_cmd_lock); ionic_dev_cmd_reset(idev); err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); @@ -612,15 +621,17 @@ int ionic_port_init(struct ionic *ionic) int ionic_port_reset(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; - int err; + int err = 0; if (!idev->port_info) return 0; - mutex_lock(&ionic->dev_cmd_lock); - ionic_dev_cmd_port_reset(idev); - err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); - mutex_unlock(&ionic->dev_cmd_lock); + if (ionic_is_fw_running(idev)) { + mutex_lock(&ionic->dev_cmd_lock); + ionic_dev_cmd_port_reset(idev); + err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT); + mutex_unlock(&ionic->dev_cmd_lock); + } dma_free_coherent(ionic->dev, idev->port_info_sz, idev->port_info, idev->port_info_pa); @@ -628,9 +639,6 @@ int ionic_port_reset(struct ionic *ionic) idev->port_info = NULL; idev->port_info_pa = 0; - if (err) - dev_err(ionic->dev, "Failed to reset port\n"); - return err; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 48cf4355bc47a..0848b5529d48a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -2984,12 +2984,16 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, u8 mask = QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED; struct qed_filter_accept_flags *flags = ¶ms->accept_flags; struct qed_public_vf_info *vf_info; + u16 tlv_mask; + + tlv_mask = BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM) | + BIT(QED_IOV_VP_UPDATE_ACCEPT_ANY_VLAN); /* Untrusted VFs can't even be trusted to know that fact. * Simply indicate everything is configured fine, and trace * configuration 'behind their back'. */ - if (!(*tlvs & BIT(QED_IOV_VP_UPDATE_ACCEPT_PARAM))) + if (!(*tlvs & tlv_mask)) return 0; vf_info = qed_iov_get_public_vf_info(hwfn, vfid, true); @@ -3006,6 +3010,13 @@ static int qed_iov_pre_update_vport(struct qed_hwfn *hwfn, flags->tx_accept_filter &= ~mask; } + if (params->update_accept_any_vlan_flg) { + vf_info->accept_any_vlan = params->accept_any_vlan; + + if (vf_info->forced_vlan && !vf_info->is_trusted_configured) + params->accept_any_vlan = false; + } + return 0; } @@ -4719,6 +4730,7 @@ static int qed_get_vf_config(struct qed_dev *cdev, tx_rate = vf_info->tx_rate; ivi->max_tx_rate = tx_rate ? tx_rate : link.speed; ivi->min_tx_rate = qed_iov_get_vf_min_rate(hwfn, vf_id); + ivi->trusted = vf_info->is_trusted_request; return 0; } @@ -5149,6 +5161,12 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) params.update_ctl_frame_check = 1; params.mac_chk_en = !vf_info->is_trusted_configured; + params.update_accept_any_vlan_flg = 0; + + if (vf_info->accept_any_vlan && vf_info->forced_vlan) { + params.update_accept_any_vlan_flg = 1; + params.accept_any_vlan = vf_info->accept_any_vlan; + } if (vf_info->rx_accept_mode & mask) { flags->update_rx_mode_config = 1; @@ -5164,13 +5182,20 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) if (!vf_info->is_trusted_configured) { flags->rx_accept_filter &= ~mask; flags->tx_accept_filter &= ~mask; + params.accept_any_vlan = false; } if (flags->update_rx_mode_config || flags->update_tx_mode_config || - params.update_ctl_frame_check) + params.update_ctl_frame_check || + params.update_accept_any_vlan_flg) { + DP_VERBOSE(hwfn, QED_MSG_IOV, + "vport update config for %s VF[abs 0x%x rel 0x%x]\n", + vf_info->is_trusted_configured ? "trusted" : "untrusted", + vf->abs_vf_id, vf->relative_vf_id); qed_sp_vport_update(hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); + } } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index f448e3dd6c8ba..6ee2493de1642 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -62,6 +62,7 @@ struct qed_public_vf_info { bool is_trusted_request; u8 rx_accept_mode; u8 tx_accept_mode; + bool accept_any_vlan; }; struct qed_iov_vf_init_params { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h index 5d79ee4370bcd..7519773eaca6e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h @@ -51,7 +51,7 @@ static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->get_hw_capability) return dcb->ops->get_hw_capability(dcb); - return 0; + return -EOPNOTSUPP; } static inline void qlcnic_dcb_free(struct qlcnic_dcb *dcb) @@ -65,7 +65,7 @@ static inline int qlcnic_dcb_attach(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->attach) return dcb->ops->attach(dcb); - return 0; + return -EOPNOTSUPP; } static inline int @@ -74,7 +74,7 @@ qlcnic_dcb_query_hw_capability(struct qlcnic_dcb *dcb, char *buf) if (dcb && dcb->ops->query_hw_capability) return dcb->ops->query_hw_capability(dcb, buf); - return 0; + return -EOPNOTSUPP; } static inline void qlcnic_dcb_get_info(struct qlcnic_dcb *dcb) @@ -89,7 +89,7 @@ qlcnic_dcb_query_cee_param(struct qlcnic_dcb *dcb, char *buf, u8 type) if (dcb && dcb->ops->query_cee_param) return dcb->ops->query_cee_param(dcb, buf, type); - return 0; + return -EOPNOTSUPP; } static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb) @@ -97,7 +97,7 @@ static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->get_cee_cfg) return dcb->ops->get_cee_cfg(dcb); - return 0; + return -EOPNOTSUPP; } static inline void qlcnic_dcb_aen_handler(struct qlcnic_dcb *dcb, void *msg) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 2ffa0a11eea56..569683f33804c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -460,6 +460,13 @@ static int ethqos_clks_config(void *priv, bool enabled) dev_err(ðqos->pdev->dev, "rgmii_clk enable failed\n"); return ret; } + + /* Enable functional clock to prevent DMA reset to timeout due + * to lacking PHY clock after the hardware block has been power + * cycled. The actual configuration will be adjusted once + * ethqos_fix_mac_speed() is invoked. + */ + ethqos_set_func_clk_en(ethqos); } else { clk_disable_unprepare(ethqos->rgmii_clk); } diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c index aa42141be3c0e..a557a477d0393 100644 --- a/drivers/net/ethernet/ti/cpsw_ethtool.c +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c @@ -364,11 +364,9 @@ int cpsw_ethtool_op_begin(struct net_device *ndev) struct cpsw_common *cpsw = priv->cpsw; int ret; - ret = pm_runtime_get_sync(cpsw->dev); - if (ret < 0) { + ret = pm_runtime_resume_and_get(cpsw->dev); + if (ret < 0) cpsw_err(priv, drv, "ethtool begin failed %d\n", ret); - pm_runtime_put_noidle(cpsw->dev); - } return ret; } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 377c94ec24869..90d96eb79984e 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -857,46 +857,53 @@ static void axienet_recv(struct net_device *ndev) while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) { dma_addr_t phys; - tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; - /* Ensure we see complete descriptor update */ dma_rmb(); - phys = desc_get_phys_addr(lp, cur_p); - dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size, - DMA_FROM_DEVICE); skb = cur_p->skb; cur_p->skb = NULL; - length = cur_p->app4 & 0x0000FFFF; - - skb_put(skb, length); - skb->protocol = eth_type_trans(skb, ndev); - /*skb_checksum_none_assert(skb);*/ - skb->ip_summed = CHECKSUM_NONE; - - /* if we're doing Rx csum offload, set it up */ - if (lp->features & XAE_FEATURE_FULL_RX_CSUM) { - csumstatus = (cur_p->app2 & - XAE_FULL_CSUM_STATUS_MASK) >> 3; - if ((csumstatus == XAE_IP_TCP_CSUM_VALIDATED) || - (csumstatus == XAE_IP_UDP_CSUM_VALIDATED)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* skb could be NULL if a previous pass already received the + * packet for this slot in the ring, but failed to refill it + * with a newly allocated buffer. In this case, don't try to + * receive it again. + */ + if (likely(skb)) { + length = cur_p->app4 & 0x0000FFFF; + + phys = desc_get_phys_addr(lp, cur_p); + dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size, + DMA_FROM_DEVICE); + + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, ndev); + /*skb_checksum_none_assert(skb);*/ + skb->ip_summed = CHECKSUM_NONE; + + /* if we're doing Rx csum offload, set it up */ + if (lp->features & XAE_FEATURE_FULL_RX_CSUM) { + csumstatus = (cur_p->app2 & + XAE_FULL_CSUM_STATUS_MASK) >> 3; + if (csumstatus == XAE_IP_TCP_CSUM_VALIDATED || + csumstatus == XAE_IP_UDP_CSUM_VALIDATED) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 && + skb->protocol == htons(ETH_P_IP) && + skb->len > 64) { + skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF); + skb->ip_summed = CHECKSUM_COMPLETE; } - } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 && - skb->protocol == htons(ETH_P_IP) && - skb->len > 64) { - skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF); - skb->ip_summed = CHECKSUM_COMPLETE; - } - netif_rx(skb); + netif_rx(skb); - size += length; - packets++; + size += length; + packets++; + } new_skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size); if (!new_skb) - return; + break; phys = dma_map_single(ndev->dev.parent, new_skb->data, lp->max_frm_size, @@ -905,7 +912,7 @@ static void axienet_recv(struct net_device *ndev) if (net_ratelimit()) netdev_err(ndev, "RX DMA mapping error\n"); dev_kfree_skb(new_skb); - return; + break; } desc_set_phys_addr(lp, phys, cur_p); @@ -913,6 +920,11 @@ static void axienet_recv(struct net_device *ndev) cur_p->status = 0; cur_p->skb = new_skb; + /* Only update tail_p to mark this slot as usable after it has + * been successfully refilled. + */ + tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; + if (++lp->rx_bd_ci >= lp->rx_bd_num) lp->rx_bd_ci = 0; cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index afa81a9480ccd..e675d1016c3c8 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -154,19 +154,15 @@ static void free_netvsc_device(struct rcu_head *head) kfree(nvdev->extension); - if (nvdev->recv_original_buf) { - hv_unmap_memory(nvdev->recv_buf); + if (nvdev->recv_original_buf) vfree(nvdev->recv_original_buf); - } else { + else vfree(nvdev->recv_buf); - } - if (nvdev->send_original_buf) { - hv_unmap_memory(nvdev->send_buf); + if (nvdev->send_original_buf) vfree(nvdev->send_original_buf); - } else { + else vfree(nvdev->send_buf); - } bitmap_free(nvdev->send_section_map); @@ -765,6 +761,12 @@ void netvsc_device_remove(struct hv_device *device) netvsc_teardown_send_gpadl(device, net_device, ndev); } + if (net_device->recv_original_buf) + hv_unmap_memory(net_device->recv_buf); + + if (net_device->send_original_buf) + hv_unmap_memory(net_device->send_buf); + /* Release all resources */ free_netvsc_device_rcu(net_device); } @@ -1821,6 +1823,12 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, netif_napi_del(&net_device->chan_table[0].napi); cleanup2: + if (net_device->recv_original_buf) + hv_unmap_memory(net_device->recv_buf); + + if (net_device->send_original_buf) + hv_unmap_memory(net_device->send_buf); + free_netvsc_device(&net_device->rcu); return ERR_PTR(ret); diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 29aa811af430f..a8794065b250b 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -784,25 +784,7 @@ static int at803x_probe(struct phy_device *phydev) return ret; } - /* Some bootloaders leave the fiber page selected. - * Switch to the copper page, as otherwise we read - * the PHY capabilities from the fiber side. - */ - if (phydev->drv->phy_id == ATH8031_PHY_ID) { - phy_lock_mdio_bus(phydev); - ret = at803x_write_page(phydev, AT803X_PAGE_COPPER); - phy_unlock_mdio_bus(phydev); - if (ret) - goto err; - } - return 0; - -err: - if (priv->vddio) - regulator_disable(priv->vddio); - - return ret; } static void at803x_remove(struct phy_device *phydev) @@ -912,6 +894,22 @@ static int at803x_config_init(struct phy_device *phydev) { int ret; + if (phydev->drv->phy_id == ATH8031_PHY_ID) { + /* Some bootloaders leave the fiber page selected. + * Switch to the copper page, as otherwise we read + * the PHY capabilities from the fiber side. + */ + phy_lock_mdio_bus(phydev); + ret = at803x_write_page(phydev, AT803X_PAGE_COPPER); + phy_unlock_mdio_bus(phydev); + if (ret) + return ret; + + ret = at8031_pll_config(phydev); + if (ret < 0) + return ret; + } + /* The RX and TX delay default is: * after HW reset: RX delay enabled and TX delay disabled * after SW reset: RX delay enabled, while TX delay retains the @@ -941,12 +939,6 @@ static int at803x_config_init(struct phy_device *phydev) if (ret < 0) return ret; - if (phydev->drv->phy_id == ATH8031_PHY_ID) { - ret = at8031_pll_config(phydev); - if (ret < 0) - return ret; - } - /* Ar803x extended next page bit is enabled by default. Cisco * multigig switches read this bit and attempt to negotiate 10Gbps * rates even if the next page bit is disabled. This is incorrect diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 3c683e0e40e9e..e36809aa6d300 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -11,6 +11,7 @@ */ #include "bcm-phy-lib.h" +#include #include #include #include @@ -602,6 +603,26 @@ static int brcm_fet_config_init(struct phy_device *phydev) if (err < 0) return err; + /* The datasheet indicates the PHY needs up to 1us to complete a reset, + * build some slack here. + */ + usleep_range(1000, 2000); + + /* The PHY requires 65 MDC clock cycles to complete a write operation + * and turnaround the line properly. + * + * We ignore -EIO here as the MDIO controller (e.g.: mdio-bcm-unimac) + * may flag the lack of turn-around as a read failure. This is + * particularly true with this combination since the MDIO controller + * only used 64 MDC cycles. This is not a critical failure in this + * specific case and it has no functional impact otherwise, so we let + * that one go through. If there is a genuine bus error, the next read + * of MII_BRCM_FET_INTREG will error out. + */ + err = phy_read(phydev, MII_BMCR); + if (err < 0 && err != -EIO) + return err; + reg = phy_read(phydev, MII_BRCM_FET_INTREG); if (reg < 0) return reg; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index a7ebcdab415b5..281cebc3d00cc 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1596,11 +1596,13 @@ static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr) { u32 data; - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, - (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); - data = phy_read(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA); + phy_lock_mdio_bus(phydev); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, + (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); + data = __phy_read(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA); + phy_unlock_mdio_bus(phydev); return data; } @@ -1608,18 +1610,18 @@ static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr) static int lanphy_write_page_reg(struct phy_device *phydev, int page, u16 addr, u16 val) { - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); - phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, - (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC)); + phy_lock_mdio_bus(phydev); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr); + __phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, + page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC); - val = phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, val); - if (val) { + val = __phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, val); + if (val != 0) phydev_err(phydev, "Error: phy_write has returned error %d\n", val); - return val; - } - return 0; + phy_unlock_mdio_bus(phydev); + return val; } static int lan8814_config_init(struct phy_device *phydev) diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index 2a1e31defe718..4334aafab59a4 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -192,8 +192,8 @@ extern const struct driver_info ax88172a_info; /* ASIX specific flags */ #define FLAG_EEPROM_MAC (1UL << 0) /* init device MAC from eeprom */ -int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data, int in_pm); +int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data, int in_pm); int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data, int in_pm); diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 71682970be584..524805285019a 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -11,8 +11,8 @@ #define AX_HOST_EN_RETRIES 30 -int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data, int in_pm) +int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, + u16 size, void *data, int in_pm) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); @@ -27,9 +27,12 @@ int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); - if (unlikely(ret < 0)) + if (unlikely(ret < size)) { + ret = ret < 0 ? ret : -ENODATA; + netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n", index, ret); + } return ret; } @@ -79,7 +82,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm) 0, 0, 1, &smsr, in_pm); if (ret == -ENODEV) break; - else if (ret < sizeof(smsr)) + else if (ret < 0) continue; else if (smsr & AX_HOST_EN) break; @@ -579,8 +582,12 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) return ret; } - asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, - (__u16)loc, 2, &res, 1); + ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, + (__u16)loc, 2, &res, 1); + if (ret < 0) { + mutex_unlock(&dev->phy_mutex); + return ret; + } asix_set_hw_mii(dev, 1); mutex_unlock(&dev->phy_mutex); diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 4514d35ef4c48..6b2fbdf4e0fde 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -755,7 +755,12 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) priv->phy_addr = ret; priv->embd_phy = ((priv->phy_addr & 0x1f) == 0x10); - asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); + if (ret < 0) { + netdev_dbg(dev->net, "Failed to read STATMNGSTS_REG: %d\n", ret); + return ret; + } + chipcode &= AX_CHIPCODE_MASK; ret = (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) : @@ -920,11 +925,21 @@ static int ax88178_reset(struct usbnet *dev) int gpio0 = 0; u32 phyid; - asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0); + ret = asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0); + if (ret < 0) { + netdev_dbg(dev->net, "Failed to read GPIOS: %d\n", ret); + return ret; + } + netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status); asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0); - asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0); + ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0); + if (ret < 0) { + netdev_dbg(dev->net, "Failed to read EEPROM: %d\n", ret); + return ret; + } + asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0); netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom); diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 1de413b19e342..8084e7408c0ae 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -4,6 +4,7 @@ */ #include "queueing.h" +#include struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) @@ -42,7 +43,7 @@ void wg_packet_queue_free(struct crypt_queue *queue, bool purge) { free_percpu(queue->worker); WARN_ON(!purge && !__ptr_ring_empty(&queue->ring)); - ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL); + ptr_ring_cleanup(&queue->ring, purge ? __skb_array_destroy_skb : NULL); } #define NEXT(skb) ((skb)->prev) diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 6f07b949cb81d..0414d7a6ce741 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -160,6 +160,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, rcu_read_unlock_bh(); return ret; #else + kfree_skb(skb); return -EAFNOSUPPORT; #endif } @@ -241,7 +242,7 @@ int wg_socket_endpoint_from_skb(struct endpoint *endpoint, endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; endpoint->src4.s_addr = ip_hdr(skb)->daddr; endpoint->src_if4 = skb->skb_iif; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { endpoint->addr6.sin6_family = AF_INET6; endpoint->addr6.sin6_port = udp_hdr(skb)->source; endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; @@ -284,7 +285,7 @@ void wg_socket_set_peer_endpoint(struct wg_peer *peer, peer->endpoint.addr4 = endpoint->addr4; peer->endpoint.src4 = endpoint->src4; peer->endpoint.src_if4 = endpoint->src_if4; - } else if (endpoint->addr.sa_family == AF_INET6) { + } else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) { peer->endpoint.addr6 = endpoint->addr6; peer->endpoint.src6 = endpoint->src6; } else { diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 9513ab696fff1..f79dd9a716906 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1556,11 +1556,11 @@ static int ath10k_setup_msa_resources(struct ath10k *ar, u32 msa_size) node = of_parse_phandle(dev->of_node, "memory-region", 0); if (node) { ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) { dev_err(dev, "failed to resolve msa fixed region\n"); return ret; } - of_node_put(node); ar->msa.paddr = r.start; ar->msa.mem_size = resource_size(&r); diff --git a/drivers/net/wireless/ath/ath10k/wow.c b/drivers/net/wireless/ath/ath10k/wow.c index 7d65c115669fe..20b9aa8ddf7d5 100644 --- a/drivers/net/wireless/ath/ath10k/wow.c +++ b/drivers/net/wireless/ath/ath10k/wow.c @@ -337,14 +337,15 @@ static int ath10k_vif_wow_set_wakeups(struct ath10k_vif *arvif, if (patterns[i].mask[j / 8] & BIT(j % 8)) bitmask[j] = 0xff; old_pattern.mask = bitmask; - new_pattern = old_pattern; if (ar->wmi.rx_decap_mode == ATH10K_HW_TXRX_NATIVE_WIFI) { - if (patterns[i].pkt_offset < ETH_HLEN) + if (patterns[i].pkt_offset < ETH_HLEN) { ath10k_wow_convert_8023_to_80211(&new_pattern, &old_pattern); - else + } else { + new_pattern = old_pattern; new_pattern.pkt_offset += WOW_HDR_LEN - ETH_HLEN; + } } if (WARN_ON(new_pattern.pattern_len > WOW_MAX_PATTERN_SIZE)) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index c212a789421ee..e432f8dc05d61 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -2642,9 +2642,9 @@ int ath11k_dp_process_rx(struct ath11k_base *ab, int ring_id, spin_lock_bh(&srng->lock); +try_again: ath11k_hal_srng_access_begin(ab, srng); -try_again: while (likely(desc = (struct hal_reo_dest_ring *)ath11k_hal_srng_dst_get_next_entry(ab, srng))) { diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c index 91d6244b65435..8402961c66887 100644 --- a/drivers/net/wireless/ath/ath11k/dp_tx.c +++ b/drivers/net/wireless/ath/ath11k/dp_tx.c @@ -426,7 +426,7 @@ void ath11k_dp_tx_update_txcompl(struct ath11k *ar, struct hal_tx_status *ts) struct ath11k_sta *arsta; struct ieee80211_sta *sta; u16 rate, ru_tones; - u8 mcs, rate_idx, ofdma; + u8 mcs, rate_idx = 0, ofdma; int ret; spin_lock_bh(&ab->base_lock); diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 07f499d5ec92b..08e33778f63b8 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2319,6 +2319,9 @@ static void ath11k_peer_assoc_h_he_6ghz(struct ath11k *ar, if (!arg->he_flag || band != NL80211_BAND_6GHZ || !sta->he_6ghz_capa.capa) return; + if (sta->bandwidth == IEEE80211_STA_RX_BW_40) + arg->bw_40 = true; + if (sta->bandwidth == IEEE80211_STA_RX_BW_80) arg->bw_80 = true; @@ -4504,24 +4507,30 @@ static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw, sta->addr, arvif->vdev_id); } else if ((old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_NOTEXIST)) { - ath11k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr); + bool skip_peer_delete = ar->ab->hw_params.vdev_start_delay && + vif->type == NL80211_IFTYPE_STATION; - if (ar->ab->hw_params.vdev_start_delay && - vif->type == NL80211_IFTYPE_STATION) - goto free; + ath11k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr); - ret = ath11k_peer_delete(ar, arvif->vdev_id, sta->addr); - if (ret) - ath11k_warn(ar->ab, "Failed to delete peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); - else - ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "Removed peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); + if (!skip_peer_delete) { + ret = ath11k_peer_delete(ar, arvif->vdev_id, sta->addr); + if (ret) + ath11k_warn(ar->ab, + "Failed to delete peer: %pM for VDEV: %d\n", + sta->addr, arvif->vdev_id); + else + ath11k_dbg(ar->ab, + ATH11K_DBG_MAC, + "Removed peer: %pM for VDEV: %d\n", + sta->addr, arvif->vdev_id); + } ath11k_mac_dec_num_stations(arvif, sta); spin_lock_bh(&ar->ab->base_lock); peer = ath11k_peer_find(ar->ab, arvif->vdev_id, sta->addr); - if (peer && peer->sta == sta) { + if (skip_peer_delete && peer) { + peer->sta = NULL; + } else if (peer && peer->sta == sta) { ath11k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n", vif->addr, arvif->vdev_id); peer->sta = NULL; @@ -4531,7 +4540,6 @@ static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw, } spin_unlock_bh(&ar->ab->base_lock); -free: kfree(arsta->tx_stats); arsta->tx_stats = NULL; diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index e4250ba8dfee2..cccaa348cf212 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -332,6 +332,7 @@ static int ath11k_mhi_read_addr_from_dt(struct mhi_controller *mhi_ctrl) return -ENOENT; ret = of_address_to_resource(np, 0, &res); + of_node_put(np); if (ret) return ret; diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index 65d3c6ba35ae6..d0701e8eca9c0 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -1932,10 +1932,11 @@ static int ath11k_qmi_assign_target_mem_chunk(struct ath11k_base *ab) if (!hremote_node) { ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi fail to get hremote_node\n"); - return ret; + return -ENODEV; } ret = of_address_to_resource(hremote_node, 0, &res); + of_node_put(hremote_node); if (ret) { ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi fail to get reg from hremote\n"); diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c index 510e61e97dbcb..994ec48b2f669 100644 --- a/drivers/net/wireless/ath/ath9k/htc_hst.c +++ b/drivers/net/wireless/ath/ath9k/htc_hst.c @@ -30,6 +30,7 @@ static int htc_issue_send(struct htc_target *target, struct sk_buff* skb, hdr->endpoint_id = epid; hdr->flags = flags; hdr->payload_len = cpu_to_be16(len); + memset(hdr->control, 0, sizeof(hdr->control)); status = target->hif->send(target->hif_dev, endpoint->ul_pipeid, skb); @@ -272,6 +273,10 @@ int htc_connect_service(struct htc_target *target, conn_msg->dl_pipeid = endpoint->dl_pipeid; conn_msg->ul_pipeid = endpoint->ul_pipeid; + /* To prevent infoleak */ + conn_msg->svc_meta_len = 0; + conn_msg->pad = 0; + ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0); if (ret) goto err; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 49f7ee1c912b8..2208ec8004821 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1914,7 +1914,7 @@ static int carl9170_parse_eeprom(struct ar9170 *ar) WARN_ON(!(tx_streams >= 1 && tx_streams <= IEEE80211_HT_MCS_TX_MAX_STREAMS)); - tx_params = (tx_streams - 1) << + tx_params |= (tx_streams - 1) << IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT; carl9170_band_2GHz.ht_cap.mcs.tx_params |= tx_params; diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index b2400e2417a55..f15e7bd690b5b 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -667,14 +667,14 @@ ath_regd_init_wiphy(struct ath_regulatory *reg, /* * Some users have reported their EEPROM programmed with - * 0x8000 or 0x0 set, this is not a supported regulatory - * domain but since we have more than one user with it we - * need a solution for them. We default to 0x64, which is - * the default Atheros world regulatory domain. + * 0x8000 set, this is not a supported regulatory domain + * but since we have more than one user with it we need + * a solution for them. We default to 0x64, which is the + * default Atheros world regulatory domain. */ static void ath_regd_sanitize(struct ath_regulatory *reg) { - if (reg->current_rd != COUNTRY_ERD_FLAG && reg->current_rd != 0) + if (reg->current_rd != COUNTRY_ERD_FLAG) return; printk(KERN_DEBUG "ath: EEPROM regdomain sanitized\n"); reg->current_rd = 0x64; diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 9575d7373bf27..ac2813ed851c4 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1513,6 +1513,9 @@ static int wcn36xx_platform_get_resources(struct wcn36xx *wcn, if (iris_node) { if (of_device_is_compatible(iris_node, "qcom,wcn3620")) wcn->rf_id = RF_IRIS_WCN3620; + if (of_device_is_compatible(iris_node, "qcom,wcn3660") || + of_device_is_compatible(iris_node, "qcom,wcn3660b")) + wcn->rf_id = RF_IRIS_WCN3660; if (of_device_is_compatible(iris_node, "qcom,wcn3680")) wcn->rf_id = RF_IRIS_WCN3680; of_node_put(iris_node); diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h index fbd0558c2c196..5d3f8f56e5681 100644 --- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h +++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h @@ -97,6 +97,7 @@ enum wcn36xx_ampdu_state { #define RF_UNKNOWN 0x0000 #define RF_IRIS_WCN3620 0x3620 +#define RF_IRIS_WCN3660 0x3660 #define RF_IRIS_WCN3680 0x3680 static inline void buff_to_be(u32 *buf, size_t len) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index d99140960a820..dcbe55b56e437 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -207,6 +207,8 @@ static int brcmf_init_nvram_parser(struct nvram_parser *nvp, size = BRCMF_FW_MAX_NVRAM_SIZE; else size = data_len; + /* Add space for properties we may add */ + size += strlen(BRCMF_FW_DEFAULT_BOARDREV) + 1; /* Alloc for extra 0 byte + roundup by 4 + length field */ size += 1 + 3 + sizeof(u32); nvp->nvram = kzalloc(size, GFP_KERNEL); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 8b149996fc000..3ff4997e1c97a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,13 @@ BRCMF_FW_DEF(4366B, "brcmfmac4366b-pcie"); BRCMF_FW_DEF(4366C, "brcmfmac4366c-pcie"); BRCMF_FW_DEF(4371, "brcmfmac4371-pcie"); +/* firmware config files */ +MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.txt"); +MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt"); + +/* per-board firmware binaries */ +MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.bin"); + static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = { BRCMF_FW_ENTRY(BRCM_CC_43602_CHIP_ID, 0xFFFFFFFF, 43602), BRCMF_FW_ENTRY(BRCM_CC_43465_CHIP_ID, 0xFFFFFFF0, 4366C), @@ -447,47 +455,6 @@ brcmf_pcie_write_ram32(struct brcmf_pciedev_info *devinfo, u32 mem_offset, } -static void -brcmf_pcie_copy_mem_todev(struct brcmf_pciedev_info *devinfo, u32 mem_offset, - void *srcaddr, u32 len) -{ - void __iomem *address = devinfo->tcm + mem_offset; - __le32 *src32; - __le16 *src16; - u8 *src8; - - if (((ulong)address & 4) || ((ulong)srcaddr & 4) || (len & 4)) { - if (((ulong)address & 2) || ((ulong)srcaddr & 2) || (len & 2)) { - src8 = (u8 *)srcaddr; - while (len) { - iowrite8(*src8, address); - address++; - src8++; - len--; - } - } else { - len = len / 2; - src16 = (__le16 *)srcaddr; - while (len) { - iowrite16(le16_to_cpu(*src16), address); - address += 2; - src16++; - len--; - } - } - } else { - len = len / 4; - src32 = (__le32 *)srcaddr; - while (len) { - iowrite32(le32_to_cpu(*src32), address); - address += 4; - src32++; - len--; - } - } -} - - static void brcmf_pcie_copy_dev_tomem(struct brcmf_pciedev_info *devinfo, u32 mem_offset, void *dstaddr, u32 len) @@ -1348,6 +1315,18 @@ static void brcmf_pcie_down(struct device *dev) { } +static int brcmf_pcie_preinit(struct device *dev) +{ + struct brcmf_bus *bus_if = dev_get_drvdata(dev); + struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie; + + brcmf_dbg(PCIE, "Enter\n"); + + brcmf_pcie_intr_enable(buspub->devinfo); + brcmf_pcie_hostready(buspub->devinfo); + + return 0; +} static int brcmf_pcie_tx(struct device *dev, struct sk_buff *skb) { @@ -1456,6 +1435,7 @@ static int brcmf_pcie_reset(struct device *dev) } static const struct brcmf_bus_ops brcmf_pcie_bus_ops = { + .preinit = brcmf_pcie_preinit, .txdata = brcmf_pcie_tx, .stop = brcmf_pcie_down, .txctl = brcmf_pcie_tx_ctlpkt, @@ -1563,8 +1543,8 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, return err; brcmf_dbg(PCIE, "Download FW %s\n", devinfo->fw_name); - brcmf_pcie_copy_mem_todev(devinfo, devinfo->ci->rambase, - (void *)fw->data, fw->size); + memcpy_toio(devinfo->tcm + devinfo->ci->rambase, + (void *)fw->data, fw->size); resetintr = get_unaligned_le32(fw->data); release_firmware(fw); @@ -1578,7 +1558,7 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, brcmf_dbg(PCIE, "Download NVRAM %s\n", devinfo->nvram_name); address = devinfo->ci->rambase + devinfo->ci->ramsize - nvram_len; - brcmf_pcie_copy_mem_todev(devinfo, address, nvram, nvram_len); + memcpy_toio(devinfo->tcm + address, nvram, nvram_len); brcmf_fw_nvram_free(nvram); } else { brcmf_dbg(PCIE, "No matching NVRAM file found %s\n", @@ -1777,6 +1757,8 @@ static void brcmf_pcie_setup(struct device *dev, int ret, ret = brcmf_chip_get_raminfo(devinfo->ci); if (ret) { brcmf_err(bus, "Failed to get RAM info\n"); + release_firmware(fw); + brcmf_fw_nvram_free(nvram); goto fail; } @@ -1826,9 +1808,6 @@ static void brcmf_pcie_setup(struct device *dev, int ret, init_waitqueue_head(&devinfo->mbdata_resp_wait); - brcmf_pcie_intr_enable(devinfo); - brcmf_pcie_hostready(devinfo); - ret = brcmf_attach(&devinfo->pdev->dev); if (ret) goto fail; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 8effeb7a7269b..5d156e591b35c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -629,7 +629,6 @@ BRCMF_FW_CLM_DEF(43752, "brcmfmac43752-sdio"); /* firmware config files */ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.txt"); -MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt"); /* per-board firmware binaries */ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.bin"); diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c index 754876cd27ce8..e8bd4f0e3d2dc 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c @@ -299,7 +299,7 @@ static int iwlagn_mac_start(struct ieee80211_hw *hw) priv->is_open = 1; IWL_DEBUG_MAC80211(priv, "leave\n"); - return 0; + return ret; } static void iwlagn_mac_stop(struct ieee80211_hw *hw) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 7ad9cee925da5..372cc950cc884 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1561,8 +1561,6 @@ iwl_dump_ini_dbgi_sram_iter(struct iwl_fw_runtime *fwrt, return -EBUSY; range->range_data_size = reg->dev_addr.size; - iwl_write_prph_no_grab(fwrt->trans, DBGI_SRAM_TARGET_ACCESS_CFG, - DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK); for (i = 0; i < (le32_to_cpu(reg->dev_addr.size) / 4); i++) { prph_data = iwl_read_prph_no_grab(fwrt->trans, (i % 2) ? DBGI_SRAM_TARGET_ACCESS_RDATA_MSB : diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index c73672d613562..42f6f8bb83be9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -861,11 +861,18 @@ static void iwl_dbg_tlv_apply_config(struct iwl_fw_runtime *fwrt, case IWL_FW_INI_CONFIG_SET_TYPE_DBGC_DRAM_ADDR: { struct iwl_dbgc1_info dram_info = {}; struct iwl_dram_data *frags = &fwrt->trans->dbg.fw_mon_ini[1].frags[0]; - __le64 dram_base_addr = cpu_to_le64(frags->physical); - __le32 dram_size = cpu_to_le32(frags->size); - u64 dram_addr = le64_to_cpu(dram_base_addr); + __le64 dram_base_addr; + __le32 dram_size; + u64 dram_addr; u32 ret; + if (!frags) + break; + + dram_base_addr = cpu_to_le64(frags->physical); + dram_size = cpu_to_le32(frags->size); + dram_addr = le64_to_cpu(dram_base_addr); + IWL_DEBUG_FW(fwrt, "WRT: dram_base_addr 0x%016llx, dram_size 0x%x\n", dram_base_addr, dram_size); IWL_DEBUG_FW(fwrt, "WRT: config_list->addr_offset: %u\n", diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index 95b3dae7b504b..9331a6b6bf36c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -354,8 +354,6 @@ #define WFPM_GP2 0xA030B4 /* DBGI SRAM Register details */ -#define DBGI_SRAM_TARGET_ACCESS_CFG 0x00A2E14C -#define DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK 0x10000 #define DBGI_SRAM_TARGET_ACCESS_RDATA_LSB 0x00A2E154 #define DBGI_SRAM_TARGET_ACCESS_RDATA_MSB 0x00A2E158 diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index b400867e94f0a..3f284836e7076 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2704,7 +2704,9 @@ static int iwl_mvm_d3_test_open(struct inode *inode, struct file *file) /* start pseudo D3 */ rtnl_lock(); + wiphy_lock(mvm->hw->wiphy); err = __iwl_mvm_suspend(mvm->hw, mvm->hw->wiphy->wowlan_config, true); + wiphy_unlock(mvm->hw->wiphy); rtnl_unlock(); if (err > 0) err = -EINVAL; @@ -2760,7 +2762,9 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) iwl_fw_dbg_read_d3_debug_data(&mvm->fwrt); rtnl_lock(); + wiphy_lock(mvm->hw->wiphy); __iwl_mvm_resume(mvm, true); + wiphy_unlock(mvm->hw->wiphy); rtnl_unlock(); iwl_mvm_resume_tcm(mvm); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index ae589b3b8c46e..ee031a5897140 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1658,8 +1658,10 @@ int iwl_mvm_up(struct iwl_mvm *mvm) while (!sband && i < NUM_NL80211_BANDS) sband = mvm->hw->wiphy->bands[i++]; - if (WARN_ON_ONCE(!sband)) + if (WARN_ON_ONCE(!sband)) { + ret = -ENODEV; goto error; + } chan = &sband->channels[0]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 1f8b97995b943..069d54501e30e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -235,7 +235,8 @@ static void iwl_mvm_rx_thermal_dual_chain_req(struct iwl_mvm *mvm, */ mvm->fw_static_smps_request = req->event == cpu_to_le32(THERMAL_DUAL_CHAIN_REQ_DISABLE); - ieee80211_iterate_interfaces(mvm->hw, IEEE80211_IFACE_ITER_NORMAL, + ieee80211_iterate_interfaces(mvm->hw, + IEEE80211_IFACE_SKIP_SDATA_NOT_IN_DRIVER, iwl_mvm_intf_dual_chain_req, NULL); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 64446a11ef980..9a46468bd4345 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -640,7 +640,7 @@ static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac, struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); u16 vif_id = mvmvif->id; - if (WARN_ONCE(vif_id > MAC_INDEX_AUX, "invalid vif id: %d", vif_id)) + if (WARN_ONCE(vif_id >= MAC_INDEX_AUX, "invalid vif id: %d", vif_id)) return; if (vif->type != NL80211_IFTYPE_STATION) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 9213f8518f10d..40daced97b9e8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -318,15 +318,14 @@ static u32 iwl_mvm_get_tx_rate(struct iwl_mvm *mvm, /* info->control is only relevant for non HW rate control */ if (!ieee80211_hw_check(mvm->hw, HAS_RATE_CONTROL)) { - struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - /* HT rate doesn't make sense for a non data frame */ WARN_ONCE(info->control.rates[0].flags & IEEE80211_TX_RC_MCS && !ieee80211_is_data(fc), "Got a HT rate (flags:0x%x/mcs:%d/fc:0x%x/state:%d) for a non data frame\n", info->control.rates[0].flags, info->control.rates[0].idx, - le16_to_cpu(fc), sta ? mvmsta->sta_state : -1); + le16_to_cpu(fc), + sta ? iwl_mvm_sta_from_mac80211(sta)->sta_state : -1); rate_idx = info->control.rates[0].idx; } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index ef14584fc0a17..4b08eb46617c7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1112,7 +1112,7 @@ static const struct iwl_causes_list causes_list_pre_bz[] = { }; static const struct iwl_causes_list causes_list_bz[] = { - {MSIX_HW_INT_CAUSES_REG_SW_ERR_BZ, CSR_MSIX_HW_INT_MASK_AD, 0x29}, + {MSIX_HW_INT_CAUSES_REG_SW_ERR_BZ, CSR_MSIX_HW_INT_MASK_AD, 0x15}, }; static void iwl_pcie_map_list(struct iwl_trans *trans, diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 404c3d1a70d69..1f6f7a44d3f00 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -224,7 +224,7 @@ enum mt76_wcid_flags { MT_WCID_FLAG_HDR_TRANS, }; -#define MT76_N_WCIDS 288 +#define MT76_N_WCIDS 544 /* stored in ieee80211_tx_info::hw_queue */ #define MT_TX_HW_QUEUE_EXT_PHY BIT(3) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 2b546bc05d822..83c5eec5b1633 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -641,6 +641,9 @@ mt7603_sta_rate_tbl_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta_rates *sta_rates = rcu_dereference(sta->rates); int i; + if (!sta_rates) + return; + spin_lock_bh(&dev->mt76.lock); for (i = 0; i < ARRAY_SIZE(msta->rates); i++) { msta->rates[i].idx = sta_rates->rate[i].idx; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index ec25e5a95d442..ba31bb7caaf90 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -253,13 +253,13 @@ static void mt7615_mac_fill_tm_rx(struct mt7615_phy *phy, __le32 *rxv) static int mt7615_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) { struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb; + struct ethhdr *eth_hdr = (struct ethhdr *)(skb->data + hdr_gap); struct mt7615_sta *msta = (struct mt7615_sta *)status->wcid; + __le32 *rxd = (__le32 *)skb->data; struct ieee80211_sta *sta; struct ieee80211_vif *vif; struct ieee80211_hdr hdr; - struct ethhdr eth_hdr; - __le32 *rxd = (__le32 *)skb->data; - __le32 qos_ctrl, ht_ctrl; + u16 frame_control; if (FIELD_GET(MT_RXD1_NORMAL_ADDR_TYPE, le32_to_cpu(rxd[1])) != MT_RXD1_NORMAL_U2M) @@ -275,47 +275,53 @@ static int mt7615_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv); /* store the info from RXD and ethhdr to avoid being overridden */ - memcpy(ð_hdr, skb->data + hdr_gap, sizeof(eth_hdr)); - hdr.frame_control = FIELD_GET(MT_RXD4_FRAME_CONTROL, rxd[4]); - hdr.seq_ctrl = FIELD_GET(MT_RXD6_SEQ_CTRL, rxd[6]); - qos_ctrl = FIELD_GET(MT_RXD6_QOS_CTL, rxd[6]); - ht_ctrl = FIELD_GET(MT_RXD7_HT_CONTROL, rxd[7]); - + frame_control = le32_get_bits(rxd[4], MT_RXD4_FRAME_CONTROL); + hdr.frame_control = cpu_to_le16(frame_control); + hdr.seq_ctrl = cpu_to_le16(le32_get_bits(rxd[6], MT_RXD6_SEQ_CTRL)); hdr.duration_id = 0; + ether_addr_copy(hdr.addr1, vif->addr); ether_addr_copy(hdr.addr2, sta->addr); - switch (le16_to_cpu(hdr.frame_control) & - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + switch (frame_control & (IEEE80211_FCTL_TODS | + IEEE80211_FCTL_FROMDS)) { case 0: ether_addr_copy(hdr.addr3, vif->bss_conf.bssid); break; case IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_source); break; case IEEE80211_FCTL_TODS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); break; case IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); - ether_addr_copy(hdr.addr4, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); + ether_addr_copy(hdr.addr4, eth_hdr->h_source); break; default: break; } skb_pull(skb, hdr_gap + sizeof(struct ethhdr) - 2); - if (eth_hdr.h_proto == htons(ETH_P_AARP) || - eth_hdr.h_proto == htons(ETH_P_IPX)) + if (eth_hdr->h_proto == cpu_to_be16(ETH_P_AARP) || + eth_hdr->h_proto == cpu_to_be16(ETH_P_IPX)) ether_addr_copy(skb_push(skb, ETH_ALEN), bridge_tunnel_header); - else if (eth_hdr.h_proto >= htons(ETH_P_802_3_MIN)) + else if (be16_to_cpu(eth_hdr->h_proto) >= ETH_P_802_3_MIN) ether_addr_copy(skb_push(skb, ETH_ALEN), rfc1042_header); else skb_pull(skb, 2); if (ieee80211_has_order(hdr.frame_control)) - memcpy(skb_push(skb, 2), &ht_ctrl, 2); - if (ieee80211_is_data_qos(hdr.frame_control)) - memcpy(skb_push(skb, 2), &qos_ctrl, 2); + memcpy(skb_push(skb, IEEE80211_HT_CTL_LEN), &rxd[7], + IEEE80211_HT_CTL_LEN); + + if (ieee80211_is_data_qos(hdr.frame_control)) { + __le16 qos_ctrl; + + qos_ctrl = cpu_to_le16(le32_get_bits(rxd[6], MT_RXD6_QOS_CTL)); + memcpy(skb_push(skb, IEEE80211_QOS_CTL_LEN), &qos_ctrl, + IEEE80211_QOS_CTL_LEN); + } + if (ieee80211_has_a4(hdr.frame_control)) memcpy(skb_push(skb, sizeof(hdr)), &hdr, sizeof(hdr)); else @@ -2103,6 +2109,14 @@ void mt7615_pm_power_save_work(struct work_struct *work) test_bit(MT76_HW_SCHED_SCANNING, &dev->mphy.state)) goto out; + if (mutex_is_locked(&dev->mt76.mutex)) + /* if mt76 mutex is held we should not put the device + * to sleep since we are currently accessing device + * register map. We need to wait for the next power_save + * trigger. + */ + goto out; + if (time_is_after_jiffies(dev->pm.last_activity + delta)) { delta = dev->pm.last_activity + delta - jiffies; goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index 82d625a16a62c..ce902b107ce33 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -683,6 +683,9 @@ static void mt7615_sta_rate_tbl_update(struct ieee80211_hw *hw, struct ieee80211_sta_rates *sta_rates = rcu_dereference(sta->rates); int i; + if (!sta_rates) + return; + spin_lock_bh(&dev->mt76.lock); for (i = 0; i < ARRAY_SIZE(msta->rates); i++) { msta->rates[i].idx = sta_rates->rate[i].idx; diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index f79e3d5084f39..5664f119447bc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -310,7 +310,7 @@ mt76_connac_mcu_alloc_wtbl_req(struct mt76_dev *dev, struct mt76_wcid *wcid, } if (sta_hdr) - sta_hdr->len = cpu_to_le16(sizeof(hdr)); + le16_add_cpu(&sta_hdr->len, sizeof(hdr)); return skb_put_data(nskb, &hdr, sizeof(hdr)); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h index 5baf8370b7bd8..93c783a3af7c5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h @@ -996,7 +996,8 @@ enum { MCU_CE_CMD_SET_BSS_CONNECTED = 0x16, MCU_CE_CMD_SET_BSS_ABORT = 0x17, MCU_CE_CMD_CANCEL_HW_SCAN = 0x1b, - MCU_CE_CMD_SET_ROC = 0x1d, + MCU_CE_CMD_SET_ROC = 0x1c, + MCU_CE_CMD_SET_EDCA_PARMS = 0x1d, MCU_CE_CMD_SET_P2P_OPPPS = 0x33, MCU_CE_CMD_SET_RATE_TX_POWER = 0x5d, MCU_CE_CMD_SCHED_SCAN_ENABLE = 0x61, diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index d054cdecd5f70..29517ca08de0c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -399,7 +399,7 @@ static void mt7915_mac_init(struct mt7915_dev *dev) /* enable hardware de-agg */ mt76_set(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN); - for (i = 0; i < MT7915_WTBL_SIZE; i++) + for (i = 0; i < mt7915_wtbl_size(dev); i++) mt7915_mac_wtbl_update(dev, i, MT_WTBL_UPDATE_ADM_COUNT_CLEAR); for (i = 0; i < 2; i++) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 48f1155022823..db267642924d0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -391,13 +391,13 @@ mt7915_mac_decode_he_radiotap(struct sk_buff *skb, __le32 *rxv, u32 mode) static int mt7915_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) { struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb; + struct ethhdr *eth_hdr = (struct ethhdr *)(skb->data + hdr_gap); struct mt7915_sta *msta = (struct mt7915_sta *)status->wcid; + __le32 *rxd = (__le32 *)skb->data; struct ieee80211_sta *sta; struct ieee80211_vif *vif; struct ieee80211_hdr hdr; - struct ethhdr eth_hdr; - __le32 *rxd = (__le32 *)skb->data; - __le32 qos_ctrl, ht_ctrl; + u16 frame_control; if (FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, le32_to_cpu(rxd[3])) != MT_RXD3_NORMAL_U2M) @@ -413,47 +413,52 @@ static int mt7915_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv); /* store the info from RXD and ethhdr to avoid being overridden */ - memcpy(ð_hdr, skb->data + hdr_gap, sizeof(eth_hdr)); - hdr.frame_control = FIELD_GET(MT_RXD6_FRAME_CONTROL, rxd[6]); - hdr.seq_ctrl = FIELD_GET(MT_RXD8_SEQ_CTRL, rxd[8]); - qos_ctrl = FIELD_GET(MT_RXD8_QOS_CTL, rxd[8]); - ht_ctrl = FIELD_GET(MT_RXD9_HT_CONTROL, rxd[9]); - + frame_control = le32_get_bits(rxd[6], MT_RXD6_FRAME_CONTROL); + hdr.frame_control = cpu_to_le16(frame_control); + hdr.seq_ctrl = cpu_to_le16(le32_get_bits(rxd[8], MT_RXD8_SEQ_CTRL)); hdr.duration_id = 0; + ether_addr_copy(hdr.addr1, vif->addr); ether_addr_copy(hdr.addr2, sta->addr); - switch (le16_to_cpu(hdr.frame_control) & - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + switch (frame_control & (IEEE80211_FCTL_TODS | + IEEE80211_FCTL_FROMDS)) { case 0: ether_addr_copy(hdr.addr3, vif->bss_conf.bssid); break; case IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_source); break; case IEEE80211_FCTL_TODS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); break; case IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); - ether_addr_copy(hdr.addr4, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); + ether_addr_copy(hdr.addr4, eth_hdr->h_source); break; default: break; } skb_pull(skb, hdr_gap + sizeof(struct ethhdr) - 2); - if (eth_hdr.h_proto == htons(ETH_P_AARP) || - eth_hdr.h_proto == htons(ETH_P_IPX)) + if (eth_hdr->h_proto == cpu_to_be16(ETH_P_AARP) || + eth_hdr->h_proto == cpu_to_be16(ETH_P_IPX)) ether_addr_copy(skb_push(skb, ETH_ALEN), bridge_tunnel_header); - else if (eth_hdr.h_proto >= htons(ETH_P_802_3_MIN)) + else if (be16_to_cpu(eth_hdr->h_proto) >= ETH_P_802_3_MIN) ether_addr_copy(skb_push(skb, ETH_ALEN), rfc1042_header); else skb_pull(skb, 2); if (ieee80211_has_order(hdr.frame_control)) - memcpy(skb_push(skb, 2), &ht_ctrl, 2); - if (ieee80211_is_data_qos(hdr.frame_control)) - memcpy(skb_push(skb, 2), &qos_ctrl, 2); + memcpy(skb_push(skb, IEEE80211_HT_CTL_LEN), &rxd[9], + IEEE80211_HT_CTL_LEN); + if (ieee80211_is_data_qos(hdr.frame_control)) { + __le16 qos_ctrl; + + qos_ctrl = cpu_to_le16(le32_get_bits(rxd[8], MT_RXD8_QOS_CTL)); + memcpy(skb_push(skb, IEEE80211_QOS_CTL_LEN), &qos_ctrl, + IEEE80211_QOS_CTL_LEN); + } + if (ieee80211_has_a4(hdr.frame_control)) memcpy(skb_push(skb, sizeof(hdr)), &hdr, sizeof(hdr)); else @@ -1512,7 +1517,6 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid, break; case MT_PHY_TYPE_HT: case MT_PHY_TYPE_HT_GF: - rate.mcs += (rate.nss - 1) * 8; if (rate.mcs > 31) goto out; @@ -1594,7 +1598,7 @@ static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data) if (pid < MT_PACKET_ID_FIRST) return; - if (wcidx >= MT7915_WTBL_SIZE) + if (wcidx >= mt7915_wtbl_size(dev)) return; rcu_read_lock(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 0911b6f973b5a..31634d7ed1737 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -211,24 +211,12 @@ mt7915_mcu_get_sta_nss(u16 mcs_map) static void mt7915_mcu_set_sta_he_mcs(struct ieee80211_sta *sta, __le16 *he_mcs, - const u16 *mask) + u16 mcs_map) { struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv; - struct cfg80211_chan_def *chandef = &msta->vif->phy->mt76->chandef; + enum nl80211_band band = msta->vif->phy->mt76->chandef.chan->band; + const u16 *mask = msta->vif->bitrate_mask.control[band].he_mcs; int nss, max_nss = sta->rx_nss > 3 ? 4 : sta->rx_nss; - u16 mcs_map; - - switch (chandef->width) { - case NL80211_CHAN_WIDTH_80P80: - mcs_map = le16_to_cpu(sta->he_cap.he_mcs_nss_supp.rx_mcs_80p80); - break; - case NL80211_CHAN_WIDTH_160: - mcs_map = le16_to_cpu(sta->he_cap.he_mcs_nss_supp.rx_mcs_160); - break; - default: - mcs_map = le16_to_cpu(sta->he_cap.he_mcs_nss_supp.rx_mcs_80); - break; - } for (nss = 0; nss < max_nss; nss++) { int mcs; @@ -1264,8 +1252,11 @@ mt7915_mcu_wtbl_generic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif, generic = (struct wtbl_generic *)tlv; if (sta) { + if (vif->type == NL80211_IFTYPE_STATION) + generic->partial_aid = cpu_to_le16(vif->bss_conf.aid); + else + generic->partial_aid = cpu_to_le16(sta->aid); memcpy(generic->peer_addr, sta->addr, ETH_ALEN); - generic->partial_aid = cpu_to_le16(sta->aid); generic->muar_idx = mvif->mt76.omac_idx; generic->qos = sta->wme; } else { @@ -1319,12 +1310,15 @@ mt7915_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif, case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_AP: basic->conn_type = cpu_to_le32(CONNECTION_INFRA_STA); + basic->aid = cpu_to_le16(sta->aid); break; case NL80211_IFTYPE_STATION: basic->conn_type = cpu_to_le32(CONNECTION_INFRA_AP); + basic->aid = cpu_to_le16(vif->bss_conf.aid); break; case NL80211_IFTYPE_ADHOC: basic->conn_type = cpu_to_le32(CONNECTION_IBSS_ADHOC); + basic->aid = cpu_to_le16(sta->aid); break; default: WARN_ON(1); @@ -1332,7 +1326,6 @@ mt7915_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif, } memcpy(basic->peer_addr, sta->addr, ETH_ALEN); - basic->aid = cpu_to_le16(sta->aid); basic->qos = sta->wme; } @@ -1340,11 +1333,9 @@ static void mt7915_mcu_sta_he_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, struct ieee80211_vif *vif) { - struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv; struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv; struct ieee80211_he_cap_elem *elem = &sta->he_cap.he_cap_elem; - enum nl80211_band band = msta->vif->phy->mt76->chandef.chan->band; - const u16 *mcs_mask = msta->vif->bitrate_mask.control[band].he_mcs; + struct ieee80211_he_mcs_nss_supp mcs_map; struct sta_rec_he *he; struct tlv *tlv; u32 cap = 0; @@ -1434,22 +1425,23 @@ mt7915_mcu_sta_he_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, he->he_cap = cpu_to_le32(cap); + mcs_map = sta->he_cap.he_mcs_nss_supp; switch (sta->bandwidth) { case IEEE80211_STA_RX_BW_160: if (elem->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) mt7915_mcu_set_sta_he_mcs(sta, &he->max_nss_mcs[CMD_HE_MCS_BW8080], - mcs_mask); + le16_to_cpu(mcs_map.rx_mcs_80p80)); mt7915_mcu_set_sta_he_mcs(sta, &he->max_nss_mcs[CMD_HE_MCS_BW160], - mcs_mask); + le16_to_cpu(mcs_map.rx_mcs_160)); fallthrough; default: mt7915_mcu_set_sta_he_mcs(sta, &he->max_nss_mcs[CMD_HE_MCS_BW80], - mcs_mask); + le16_to_cpu(mcs_map.rx_mcs_80)); break; } @@ -1524,9 +1516,6 @@ mt7915_mcu_sta_muru_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, vif->type != NL80211_IFTYPE_AP) return; - if (!sta->vht_cap.vht_supported) - return; - tlv = mt7915_mcu_add_tlv(skb, STA_REC_MURU, sizeof(*muru)); muru = (struct sta_rec_muru *)tlv; @@ -1534,9 +1523,12 @@ mt7915_mcu_sta_muru_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, muru->cfg.mimo_dl_en = mvif->cap.he_mu_ebfer || mvif->cap.vht_mu_ebfer || mvif->cap.vht_mu_ebfee; + muru->cfg.mimo_ul_en = true; + muru->cfg.ofdma_dl_en = true; - muru->mimo_dl.vht_mu_bfee = - !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); + if (sta->vht_cap.vht_supported) + muru->mimo_dl.vht_mu_bfee = + !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE); if (!sta->he_cap.has_he) return; @@ -1544,13 +1536,11 @@ mt7915_mcu_sta_muru_tlv(struct sk_buff *skb, struct ieee80211_sta *sta, muru->mimo_dl.partial_bw_dl_mimo = HE_PHY(CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO, elem->phy_cap_info[6]); - muru->cfg.mimo_ul_en = true; muru->mimo_ul.full_ul_mimo = HE_PHY(CAP2_UL_MU_FULL_MU_MIMO, elem->phy_cap_info[2]); muru->mimo_ul.partial_ul_mimo = HE_PHY(CAP2_UL_MU_PARTIAL_MU_MIMO, elem->phy_cap_info[2]); - muru->cfg.ofdma_dl_en = true; muru->ofdma_dl.punc_pream_rx = HE_PHY(CAP1_PREAMBLE_PUNC_RX_MASK, elem->phy_cap_info[1]); muru->ofdma_dl.he_20m_in_40m_2g = @@ -2134,9 +2124,12 @@ mt7915_mcu_add_rate_ctrl_fixed(struct mt7915_dev *dev, phy.sgi |= gi << (i << (_he)); \ phy.he_ltf |= mask->control[band].he_ltf << (i << (_he));\ } \ - for (i = 0; i < ARRAY_SIZE(mask->control[band]._mcs); i++) \ - nrates += hweight16(mask->control[band]._mcs[i]); \ - phy.mcs = ffs(mask->control[band]._mcs[0]) - 1; \ + for (i = 0; i < ARRAY_SIZE(mask->control[band]._mcs); i++) { \ + if (!mask->control[band]._mcs[i]) \ + continue; \ + nrates += hweight16(mask->control[band]._mcs[i]); \ + phy.mcs = ffs(mask->control[band]._mcs[i]) - 1; \ + } \ } while (0) if (sta->he_cap.has_he) { @@ -2394,8 +2387,10 @@ int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif, } ret = mt7915_mcu_sta_wtbl_tlv(dev, skb, vif, sta); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } if (sta && sta->ht_cap.ht_supported) { /* starec amsdu */ @@ -2409,8 +2404,10 @@ int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif, } ret = mt7915_mcu_add_group(dev, vif, sta); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } out: return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_EXT_CMD(STA_REC_UPDATE), true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h index 42d887383e8d8..12ca545664614 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h @@ -12,7 +12,8 @@ #define MT7915_MAX_INTERFACES 19 #define MT7915_MAX_WMM_SETS 4 #define MT7915_WTBL_SIZE 288 -#define MT7915_WTBL_RESERVED (MT7915_WTBL_SIZE - 1) +#define MT7916_WTBL_SIZE 544 +#define MT7915_WTBL_RESERVED (mt7915_wtbl_size(dev) - 1) #define MT7915_WTBL_STA (MT7915_WTBL_RESERVED - \ MT7915_MAX_INTERFACES) @@ -449,6 +450,11 @@ static inline bool is_mt7915(struct mt76_dev *dev) return mt76_chip(dev) == 0x7915; } +static inline u16 mt7915_wtbl_size(struct mt7915_dev *dev) +{ + return is_mt7915(&dev->mt76) ? MT7915_WTBL_SIZE : MT7916_WTBL_SIZE; +} + void mt7915_dual_hif_set_irq_mask(struct mt7915_dev *dev, bool write_reg, u32 clear, u32 set); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c index 86fd7292b229f..196b50e616fe0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c @@ -129,23 +129,22 @@ mt7921_queues_acq(struct seq_file *s, void *data) mt7921_mutex_acquire(dev); - for (i = 0; i < 16; i++) { - int j, acs = i / 4, index = i % 4; + for (i = 0; i < 4; i++) { u32 ctrl, val, qlen = 0; + int j; - val = mt76_rr(dev, MT_PLE_AC_QEMPTY(acs, index)); - ctrl = BIT(31) | BIT(15) | (acs << 8); + val = mt76_rr(dev, MT_PLE_AC_QEMPTY(i)); + ctrl = BIT(31) | BIT(11) | (i << 24); for (j = 0; j < 32; j++) { if (val & BIT(j)) continue; - mt76_wr(dev, MT_PLE_FL_Q0_CTRL, - ctrl | (j + (index << 5))); + mt76_wr(dev, MT_PLE_FL_Q0_CTRL, ctrl | j); qlen += mt76_get_field(dev, MT_PLE_FL_Q3_CTRL, GENMASK(11, 0)); } - seq_printf(s, "AC%d%d: queued=%d\n", acs, index, qlen); + seq_printf(s, "AC%d: queued=%d\n", i, qlen); } mt7921_mutex_release(dev); @@ -291,13 +290,12 @@ mt7921_pm_set(void *data, u64 val) pm->enable = false; mt76_connac_pm_wake(&dev->mphy, pm); + pm->enable = val; ieee80211_iterate_active_interfaces(mt76_hw(dev), IEEE80211_IFACE_ITER_RESUME_ALL, mt7921_pm_interface_iter, dev); mt76_connac_mcu_set_deep_sleep(&dev->mt76, pm->ds_enable); - - pm->enable = val; mt76_connac_power_save_sched(&dev->mphy, pm); out: mutex_unlock(&dev->mt76.mutex); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c index cdff1fd52d93a..39d6ce4ecddd7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c @@ -78,110 +78,6 @@ static void mt7921_dma_prefetch(struct mt7921_dev *dev) mt76_wr(dev, MT_WFDMA0_TX_RING17_EXT_CTRL, PREFETCH(0x380, 0x4)); } -static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) -{ - static const struct { - u32 phys; - u32 mapped; - u32 size; - } fixed_map[] = { - { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ - { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ - { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ - { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ - { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ - { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ - { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ - { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ - { 0x00400000, 0x80000, 0x10000 }, /* WF_MCU_SYSRAM */ - { 0x00410000, 0x90000, 0x10000 }, /* WF_MCU_SYSRAM (configure register) */ - { 0x40000000, 0x70000, 0x10000 }, /* WF_UMAC_SYSRAM */ - { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ - { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ - { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ - { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ - { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ - { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ - { 0x7c060000, 0xe0000, 0x10000 }, /* CONN_INFRA, conn_host_csr_top */ - { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ - { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ - { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ - { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ - { 0x820cc000, 0x0e000, 0x1000 }, /* WF_UMAC_TOP (PP) */ - { 0x820cd000, 0x0f000, 0x1000 }, /* WF_MDP_TOP */ - { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ - { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ - { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ - { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ - { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ - { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ - { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ - { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ - { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ - { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ - { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ - { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ - { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ - { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ - { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ - { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ - { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ - { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ - { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ - }; - int i; - - if (addr < 0x100000) - return addr; - - for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { - u32 ofs; - - if (addr < fixed_map[i].phys) - continue; - - ofs = addr - fixed_map[i].phys; - if (ofs > fixed_map[i].size) - continue; - - return fixed_map[i].mapped + ofs; - } - - if ((addr >= 0x18000000 && addr < 0x18c00000) || - (addr >= 0x70000000 && addr < 0x78000000) || - (addr >= 0x7c000000 && addr < 0x7c400000)) - return mt7921_reg_map_l1(dev, addr); - - dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", - addr); - - return 0; -} - -static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rr(mdev, addr); -} - -static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - dev->bus_ops->wr(mdev, addr, val); -} - -static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) -{ - struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); - u32 addr = __mt7921_reg_addr(dev, offset); - - return dev->bus_ops->rmw(mdev, addr, mask, val); -} - static int mt7921_dma_disable(struct mt7921_dev *dev, bool force) { if (force) { @@ -341,23 +237,8 @@ int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev) int mt7921_dma_init(struct mt7921_dev *dev) { - struct mt76_bus_ops *bus_ops; int ret; - dev->phy.dev = dev; - dev->phy.mt76 = &dev->mt76.phy; - dev->mt76.phy.priv = &dev->phy; - dev->bus_ops = dev->mt76.bus; - bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), - GFP_KERNEL); - if (!bus_ops) - return -ENOMEM; - - bus_ops->rr = mt7921_rr; - bus_ops->wr = mt7921_wr; - bus_ops->rmw = mt7921_rmw; - dev->mt76.bus = bus_ops; - mt76_dma_attach(&dev->mt76); ret = mt7921_dma_disable(dev, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index ec10f95a46495..84f72dd1bf930 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -402,13 +402,13 @@ mt7921_mac_assoc_rssi(struct mt7921_dev *dev, struct sk_buff *skb) static int mt7921_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) { struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb; + struct ethhdr *eth_hdr = (struct ethhdr *)(skb->data + hdr_gap); struct mt7921_sta *msta = (struct mt7921_sta *)status->wcid; + __le32 *rxd = (__le32 *)skb->data; struct ieee80211_sta *sta; struct ieee80211_vif *vif; struct ieee80211_hdr hdr; - struct ethhdr eth_hdr; - __le32 *rxd = (__le32 *)skb->data; - __le32 qos_ctrl, ht_ctrl; + u16 frame_control; if (FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, le32_to_cpu(rxd[3])) != MT_RXD3_NORMAL_U2M) @@ -424,47 +424,52 @@ static int mt7921_reverse_frag0_hdr_trans(struct sk_buff *skb, u16 hdr_gap) vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv); /* store the info from RXD and ethhdr to avoid being overridden */ - memcpy(ð_hdr, skb->data + hdr_gap, sizeof(eth_hdr)); - hdr.frame_control = FIELD_GET(MT_RXD6_FRAME_CONTROL, rxd[6]); - hdr.seq_ctrl = FIELD_GET(MT_RXD8_SEQ_CTRL, rxd[8]); - qos_ctrl = FIELD_GET(MT_RXD8_QOS_CTL, rxd[8]); - ht_ctrl = FIELD_GET(MT_RXD9_HT_CONTROL, rxd[9]); - + frame_control = le32_get_bits(rxd[6], MT_RXD6_FRAME_CONTROL); + hdr.frame_control = cpu_to_le16(frame_control); + hdr.seq_ctrl = cpu_to_le16(le32_get_bits(rxd[8], MT_RXD8_SEQ_CTRL)); hdr.duration_id = 0; + ether_addr_copy(hdr.addr1, vif->addr); ether_addr_copy(hdr.addr2, sta->addr); - switch (le16_to_cpu(hdr.frame_control) & - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + switch (frame_control & (IEEE80211_FCTL_TODS | + IEEE80211_FCTL_FROMDS)) { case 0: ether_addr_copy(hdr.addr3, vif->bss_conf.bssid); break; case IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_source); break; case IEEE80211_FCTL_TODS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); break; case IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS: - ether_addr_copy(hdr.addr3, eth_hdr.h_dest); - ether_addr_copy(hdr.addr4, eth_hdr.h_source); + ether_addr_copy(hdr.addr3, eth_hdr->h_dest); + ether_addr_copy(hdr.addr4, eth_hdr->h_source); break; default: break; } skb_pull(skb, hdr_gap + sizeof(struct ethhdr) - 2); - if (eth_hdr.h_proto == htons(ETH_P_AARP) || - eth_hdr.h_proto == htons(ETH_P_IPX)) + if (eth_hdr->h_proto == cpu_to_be16(ETH_P_AARP) || + eth_hdr->h_proto == cpu_to_be16(ETH_P_IPX)) ether_addr_copy(skb_push(skb, ETH_ALEN), bridge_tunnel_header); - else if (eth_hdr.h_proto >= htons(ETH_P_802_3_MIN)) + else if (be16_to_cpu(eth_hdr->h_proto) >= ETH_P_802_3_MIN) ether_addr_copy(skb_push(skb, ETH_ALEN), rfc1042_header); else skb_pull(skb, 2); if (ieee80211_has_order(hdr.frame_control)) - memcpy(skb_push(skb, 2), &ht_ctrl, 2); - if (ieee80211_is_data_qos(hdr.frame_control)) - memcpy(skb_push(skb, 2), &qos_ctrl, 2); + memcpy(skb_push(skb, IEEE80211_HT_CTL_LEN), &rxd[9], + IEEE80211_HT_CTL_LEN); + if (ieee80211_is_data_qos(hdr.frame_control)) { + __le16 qos_ctrl; + + qos_ctrl = cpu_to_le16(le32_get_bits(rxd[8], MT_RXD8_QOS_CTL)); + memcpy(skb_push(skb, IEEE80211_QOS_CTL_LEN), &qos_ctrl, + IEEE80211_QOS_CTL_LEN); + } + if (ieee80211_has_a4(hdr.frame_control)) memcpy(skb_push(skb, sizeof(hdr)), &hdr, sizeof(hdr)); else @@ -914,9 +919,15 @@ mt7921_mac_write_txwi_80211(struct mt7921_dev *dev, __le32 *txwi, txwi[3] |= cpu_to_le32(val); } - val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | - FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); - txwi[7] |= cpu_to_le32(val); + if (mt76_is_mmio(&dev->mt76)) { + val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | + FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); + txwi[7] |= cpu_to_le32(val); + } else { + val = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) | + FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype); + txwi[8] |= cpu_to_le32(val); + } } void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi, @@ -1092,7 +1103,6 @@ mt7921_mac_add_txs_skb(struct mt7921_dev *dev, struct mt76_wcid *wcid, int pid, break; case MT_PHY_TYPE_HT: case MT_PHY_TYPE_HT_GF: - rate.mcs += (rate.nss - 1) * 8; if (rate.mcs > 31) goto out; @@ -1551,6 +1561,14 @@ void mt7921_pm_power_save_work(struct work_struct *work) test_bit(MT76_HW_SCHED_SCANNING, &mphy->state)) goto out; + if (mutex_is_locked(&dev->mt76.mutex)) + /* if mt76 mutex is held we should not put the device + * to sleep since we are currently accessing device + * register map. We need to wait for the next power_save + * trigger. + */ + goto out; + if (time_is_after_jiffies(dev->pm.last_activity + delta)) { delta = dev->pm.last_activity + delta - jiffies; goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h index 544a1c33126a4..12e1cf8abe6ea 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h @@ -284,6 +284,9 @@ enum tx_mcu_port_q_idx { #define MT_TXD7_HW_AMSDU BIT(10) #define MT_TXD7_TX_TIME GENMASK(9, 0) +#define MT_TXD8_L_TYPE GENMASK(5, 4) +#define MT_TXD8_L_SUB_TYPE GENMASK(3, 0) + #define MT_TX_RATE_STBC BIT(13) #define MT_TX_RATE_NSS GENMASK(12, 10) #define MT_TX_RATE_MODE GENMASK(9, 6) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index ef1e1ef91611b..e82545a7fcc11 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -707,12 +707,8 @@ static int mt7921_load_patch(struct mt7921_dev *dev) if (mt76_is_sdio(&dev->mt76)) { /* activate again */ ret = __mt7921_mcu_fw_pmctrl(dev); - if (ret) - return ret; - - ret = __mt7921_mcu_drv_pmctrl(dev); - if (ret) - return ret; + if (!ret) + ret = __mt7921_mcu_drv_pmctrl(dev); } out: @@ -920,33 +916,28 @@ EXPORT_SYMBOL_GPL(mt7921_mcu_exit); int mt7921_mcu_set_tx(struct mt7921_dev *dev, struct ieee80211_vif *vif) { -#define WMM_AIFS_SET BIT(0) -#define WMM_CW_MIN_SET BIT(1) -#define WMM_CW_MAX_SET BIT(2) -#define WMM_TXOP_SET BIT(3) -#define WMM_PARAM_SET GENMASK(3, 0) -#define TX_CMD_MODE 1 + struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv; + struct edca { - u8 queue; - u8 set; - u8 aifs; - u8 cw_min; + __le16 cw_min; __le16 cw_max; __le16 txop; - }; + __le16 aifs; + u8 guardtime; + u8 acm; + } __packed; struct mt7921_mcu_tx { - u8 total; - u8 action; - u8 valid; - u8 mode; - struct edca edca[IEEE80211_NUM_ACS]; + u8 bss_idx; + u8 qos; + u8 wmm_idx; + u8 pad; } __packed req = { - .valid = true, - .mode = TX_CMD_MODE, - .total = IEEE80211_NUM_ACS, + .bss_idx = mvif->mt76.idx, + .qos = vif->bss_conf.qos, + .wmm_idx = mvif->mt76.wmm_idx, }; - struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv; + struct mu_edca { u8 cw_min; u8 cw_max; @@ -970,30 +961,29 @@ int mt7921_mcu_set_tx(struct mt7921_dev *dev, struct ieee80211_vif *vif) .qos = vif->bss_conf.qos, .wmm_idx = mvif->mt76.wmm_idx, }; + int to_aci[] = {1, 0, 2, 3}; int ac, ret; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { struct ieee80211_tx_queue_params *q = &mvif->queue_params[ac]; - struct edca *e = &req.edca[ac]; + struct edca *e = &req.edca[to_aci[ac]]; - e->set = WMM_PARAM_SET; - e->queue = ac + mvif->mt76.wmm_idx * MT7921_MAX_WMM_SETS; e->aifs = q->aifs; e->txop = cpu_to_le16(q->txop); if (q->cw_min) - e->cw_min = fls(q->cw_min); + e->cw_min = cpu_to_le16(q->cw_min); else e->cw_min = 5; if (q->cw_max) - e->cw_max = cpu_to_le16(fls(q->cw_max)); + e->cw_max = cpu_to_le16(q->cw_max); else e->cw_max = cpu_to_le16(10); } - ret = mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(EDCA_UPDATE), - &req, sizeof(req), true); + ret = mt76_mcu_send_msg(&dev->mt76, MCU_CE_CMD(SET_EDCA_PARMS), &req, + sizeof(req), false); if (ret) return ret; @@ -1003,7 +993,6 @@ int mt7921_mcu_set_tx(struct mt7921_dev *dev, struct ieee80211_vif *vif) for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { struct ieee80211_he_mu_edca_param_ac_rec *q; struct mu_edca *e; - int to_aci[] = {1, 0, 2, 3}; if (!mvif->queue_params[ac].mu_edca) break; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h index 96647801850a5..33f8e5b541b35 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h @@ -452,6 +452,7 @@ int mt7921e_mcu_init(struct mt7921_dev *dev); int mt7921s_wfsys_reset(struct mt7921_dev *dev); int mt7921s_mac_reset(struct mt7921_dev *dev); int mt7921s_init_reset(struct mt7921_dev *dev); +int __mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev); int mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev); int mt7921e_mcu_fw_pmctrl(struct mt7921_dev *dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 9dae2f5972bf9..9a71a5d864819 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -121,6 +121,110 @@ static void mt7921e_unregister_device(struct mt7921_dev *dev) mt76_free_device(&dev->mt76); } +static u32 __mt7921_reg_addr(struct mt7921_dev *dev, u32 addr) +{ + static const struct { + u32 phys; + u32 mapped; + u32 size; + } fixed_map[] = { + { 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */ + { 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */ + { 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */ + { 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */ + { 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */ + { 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */ + { 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */ + { 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */ + { 0x00400000, 0x80000, 0x10000 }, /* WF_MCU_SYSRAM */ + { 0x00410000, 0x90000, 0x10000 }, /* WF_MCU_SYSRAM (configure register) */ + { 0x40000000, 0x70000, 0x10000 }, /* WF_UMAC_SYSRAM */ + { 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */ + { 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */ + { 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */ + { 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */ + { 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */ + { 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */ + { 0x7c060000, 0xe0000, 0x10000 }, /* CONN_INFRA, conn_host_csr_top */ + { 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */ + { 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */ + { 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */ + { 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */ + { 0x820cc000, 0x0e000, 0x1000 }, /* WF_UMAC_TOP (PP) */ + { 0x820cd000, 0x0f000, 0x1000 }, /* WF_MDP_TOP */ + { 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */ + { 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */ + { 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */ + { 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */ + { 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */ + { 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */ + { 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */ + { 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */ + { 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */ + { 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */ + { 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */ + { 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */ + { 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */ + { 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */ + { 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */ + { 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */ + { 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */ + { 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */ + { 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */ + }; + int i; + + if (addr < 0x100000) + return addr; + + for (i = 0; i < ARRAY_SIZE(fixed_map); i++) { + u32 ofs; + + if (addr < fixed_map[i].phys) + continue; + + ofs = addr - fixed_map[i].phys; + if (ofs > fixed_map[i].size) + continue; + + return fixed_map[i].mapped + ofs; + } + + if ((addr >= 0x18000000 && addr < 0x18c00000) || + (addr >= 0x70000000 && addr < 0x78000000) || + (addr >= 0x7c000000 && addr < 0x7c400000)) + return mt7921_reg_map_l1(dev, addr); + + dev_err(dev->mt76.dev, "Access currently unsupported address %08x\n", + addr); + + return 0; +} + +static u32 mt7921_rr(struct mt76_dev *mdev, u32 offset) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rr(mdev, addr); +} + +static void mt7921_wr(struct mt76_dev *mdev, u32 offset, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + dev->bus_ops->wr(mdev, addr, val); +} + +static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) +{ + struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); + u32 addr = __mt7921_reg_addr(dev, offset); + + return dev->bus_ops->rmw(mdev, addr, mask, val); +} + static int mt7921_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -151,6 +255,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev, .fw_own = mt7921e_mcu_fw_pmctrl, }; + struct mt76_bus_ops *bus_ops; struct mt7921_dev *dev; struct mt76_dev *mdev; int ret; @@ -188,6 +293,25 @@ static int mt7921_pci_probe(struct pci_dev *pdev, mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]); tasklet_init(&dev->irq_tasklet, mt7921_irq_tasklet, (unsigned long)dev); + + dev->phy.dev = dev; + dev->phy.mt76 = &dev->mt76.phy; + dev->mt76.phy.priv = &dev->phy; + dev->bus_ops = dev->mt76.bus; + bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops), + GFP_KERNEL); + if (!bus_ops) + return -ENOMEM; + + bus_ops->rr = mt7921_rr; + bus_ops->wr = mt7921_wr; + bus_ops->rmw = mt7921_rmw; + dev->mt76.bus = bus_ops; + + ret = __mt7921e_mcu_drv_pmctrl(dev); + if (ret) + return ret; + mdev->rev = (mt7921_l1_rr(dev, MT_HW_CHIPID) << 16) | (mt7921_l1_rr(dev, MT_HW_REV) & 0xff); dev_info(mdev->dev, "ASIC revision: %04x\n", mdev->rev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c index a020352122a12..daa73c92426ca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c @@ -59,10 +59,8 @@ int mt7921e_mcu_init(struct mt7921_dev *dev) return err; } -int mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev) +int __mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev) { - struct mt76_phy *mphy = &dev->mt76.phy; - struct mt76_connac_pm *pm = &dev->pm; int i, err = 0; for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) { @@ -75,9 +73,21 @@ int mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev) if (i == MT7921_DRV_OWN_RETRY_COUNT) { dev_err(dev->mt76.dev, "driver own failed\n"); err = -EIO; - goto out; } + return err; +} + +int mt7921e_mcu_drv_pmctrl(struct mt7921_dev *dev) +{ + struct mt76_phy *mphy = &dev->mt76.phy; + struct mt76_connac_pm *pm = &dev->pm; + int err; + + err = __mt7921e_mcu_drv_pmctrl(dev); + if (err < 0) + goto out; + mt7921_wpdma_reinit_cond(dev); clear_bit(MT76_STATE_PM, &mphy->state); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h index cbd38122c510f..c8c92faa4624f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h @@ -17,13 +17,12 @@ #define MT_PLE_BASE 0x820c0000 #define MT_PLE(ofs) (MT_PLE_BASE + (ofs)) -#define MT_PLE_FL_Q0_CTRL MT_PLE(0x1b0) -#define MT_PLE_FL_Q1_CTRL MT_PLE(0x1b4) -#define MT_PLE_FL_Q2_CTRL MT_PLE(0x1b8) -#define MT_PLE_FL_Q3_CTRL MT_PLE(0x1bc) +#define MT_PLE_FL_Q0_CTRL MT_PLE(0x3e0) +#define MT_PLE_FL_Q1_CTRL MT_PLE(0x3e4) +#define MT_PLE_FL_Q2_CTRL MT_PLE(0x3e8) +#define MT_PLE_FL_Q3_CTRL MT_PLE(0x3ec) -#define MT_PLE_AC_QEMPTY(ac, n) MT_PLE(0x300 + 0x10 * (ac) + \ - ((n) << 2)) +#define MT_PLE_AC_QEMPTY(_n) MT_PLE(0x500 + 0x40 * (_n)) #define MT_PLE_AMSDU_PACK_MSDU_CNT(n) MT_PLE(0x10e0 + ((n) << 2)) #define MT_MDP_BASE 0x820cd000 diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c index d20f2ff01be17..5d8af18c70267 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c @@ -49,6 +49,26 @@ mt7921s_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb, return ret; } +static u32 mt7921s_read_rm3r(struct mt7921_dev *dev) +{ + struct mt76_sdio *sdio = &dev->mt76.sdio; + + return sdio_readl(sdio->func, MCR_D2HRM3R, NULL); +} + +static u32 mt7921s_clear_rm3r_drv_own(struct mt7921_dev *dev) +{ + struct mt76_sdio *sdio = &dev->mt76.sdio; + u32 val; + + val = sdio_readl(sdio->func, MCR_D2HRM3R, NULL); + if (val) + sdio_writel(sdio->func, H2D_SW_INT_CLEAR_MAILBOX_ACK, + MCR_WSICR, NULL); + + return val; +} + int mt7921s_mcu_init(struct mt7921_dev *dev) { static const struct mt76_mcu_ops mt7921s_mcu_ops = { @@ -88,6 +108,12 @@ int mt7921s_mcu_drv_pmctrl(struct mt7921_dev *dev) err = readx_poll_timeout(mt76s_read_pcr, &dev->mt76, status, status & WHLPCR_IS_DRIVER_OWN, 2000, 1000000); + + if (!err && test_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state)) + err = readx_poll_timeout(mt7921s_read_rm3r, dev, status, + status & D2HRM3R_IS_DRIVER_OWN, + 2000, 1000000); + sdio_release_host(func); if (err < 0) { @@ -115,12 +141,24 @@ int mt7921s_mcu_fw_pmctrl(struct mt7921_dev *dev) sdio_claim_host(func); + if (test_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state)) { + err = readx_poll_timeout(mt7921s_clear_rm3r_drv_own, + dev, status, + !(status & D2HRM3R_IS_DRIVER_OWN), + 2000, 1000000); + if (err < 0) { + dev_err(dev->mt76.dev, "mailbox ACK not cleared\n"); + goto err; + } + } + sdio_writel(func, WHLPCR_FW_OWN_REQ_SET, MCR_WHLPCR, NULL); err = readx_poll_timeout(mt76s_read_pcr, &dev->mt76, status, !(status & WHLPCR_IS_DRIVER_OWN), 2000, 1000000); sdio_release_host(func); +err: if (err < 0) { dev_err(dev->mt76.dev, "firmware own failed\n"); clear_bit(MT76_STATE_PM, &mphy->state); diff --git a/drivers/net/wireless/mediatek/mt76/sdio.h b/drivers/net/wireless/mediatek/mt76/sdio.h index 99db4ad93b7c7..27d5d2077ebae 100644 --- a/drivers/net/wireless/mediatek/mt76/sdio.h +++ b/drivers/net/wireless/mediatek/mt76/sdio.h @@ -65,6 +65,7 @@ #define MCR_H2DSM0R 0x0070 #define H2D_SW_INT_READ BIT(16) #define H2D_SW_INT_WRITE BIT(17) +#define H2D_SW_INT_CLEAR_MAILBOX_ACK BIT(22) #define MCR_H2DSM1R 0x0074 #define MCR_D2HRM0R 0x0078 @@ -109,6 +110,7 @@ #define MCR_H2DSM2R 0x0160 /* supported in CONNAC2 */ #define MCR_H2DSM3R 0x0164 /* supported in CONNAC2 */ #define MCR_D2HRM3R 0x0174 /* supported in CONNAC2 */ +#define D2HRM3R_IS_DRIVER_OWN BIT(0) #define MCR_WTQCR8 0x0190 /* supported in CONNAC2 */ #define MCR_WTQCR9 0x0194 /* supported in CONNAC2 */ #define MCR_WTQCR10 0x0198 /* supported in CONNAC2 */ diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 2987ad9271f64..87e98ab068ed7 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -382,6 +382,8 @@ static int ray_config(struct pcmcia_device *link) goto failed; local->sram = ioremap(link->resource[2]->start, resource_size(link->resource[2])); + if (!local->sram) + goto failed; /*** Set up 16k window for shared memory (receive buffer) ***************/ link->resource[3]->flags |= @@ -396,6 +398,8 @@ static int ray_config(struct pcmcia_device *link) goto failed; local->rmem = ioremap(link->resource[3]->start, resource_size(link->resource[3])); + if (!local->rmem) + goto failed; /*** Set up window for attribute memory ***********************************/ link->resource[4]->flags |= @@ -410,6 +414,8 @@ static int ray_config(struct pcmcia_device *link) goto failed; local->amem = ioremap(link->resource[4]->start, resource_size(link->resource[4])); + if (!local->amem) + goto failed; dev_dbg(&link->dev, "ray_config sram=%p\n", local->sram); dev_dbg(&link->dev, "ray_config rmem=%p\n", local->rmem); diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c index 2f7c036f90221..4c8e5ea5d069c 100644 --- a/drivers/net/wireless/realtek/rtw88/fw.c +++ b/drivers/net/wireless/realtek/rtw88/fw.c @@ -1784,9 +1784,9 @@ void rtw_fw_scan_notify(struct rtw_dev *rtwdev, bool start) rtw_fw_send_h2c_command(rtwdev, h2c_pkt); } -static void rtw_append_probe_req_ie(struct rtw_dev *rtwdev, struct sk_buff *skb, - struct sk_buff_head *list, - struct rtw_vif *rtwvif) +static int rtw_append_probe_req_ie(struct rtw_dev *rtwdev, struct sk_buff *skb, + struct sk_buff_head *list, u8 *bands, + struct rtw_vif *rtwvif) { struct ieee80211_scan_ies *ies = rtwvif->scan_ies; struct rtw_chip_info *chip = rtwdev->chip; @@ -1797,19 +1797,24 @@ static void rtw_append_probe_req_ie(struct rtw_dev *rtwdev, struct sk_buff *skb, if (!(BIT(idx) & chip->band)) continue; new = skb_copy(skb, GFP_KERNEL); + if (!new) + return -ENOMEM; skb_put_data(new, ies->ies[idx], ies->len[idx]); skb_put_data(new, ies->common_ies, ies->common_ie_len); skb_queue_tail(list, new); + (*bands)++; } + + return 0; } -static int _rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, u8 num_ssids, +static int _rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, u8 num_probes, struct sk_buff_head *probe_req_list) { struct rtw_chip_info *chip = rtwdev->chip; struct sk_buff *skb, *tmp; u8 page_offset = 1, *buf, page_size = chip->page_size; - u8 pages = page_offset + num_ssids * RTW_PROBE_PG_CNT; + u8 pages = page_offset + num_probes * RTW_PROBE_PG_CNT; u16 pg_addr = rtwdev->fifo.rsvd_h2c_info_addr, loc; u16 buf_offset = page_size * page_offset; u8 tx_desc_sz = chip->tx_pkt_desc_sz; @@ -1848,6 +1853,8 @@ static int _rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, u8 num_ssids, rtwdev->scan_info.probe_pg_size = page_offset; out: kfree(buf); + skb_queue_walk_safe(probe_req_list, skb, tmp) + kfree_skb(skb); return ret; } @@ -1857,8 +1864,9 @@ static int rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, { struct cfg80211_scan_request *req = rtwvif->scan_req; struct sk_buff_head list; - struct sk_buff *skb; - u8 num = req->n_ssids, i; + struct sk_buff *skb, *tmp; + u8 num = req->n_ssids, i, bands = 0; + int ret; skb_queue_head_init(&list); for (i = 0; i < num; i++) { @@ -1866,11 +1874,25 @@ static int rtw_hw_scan_update_probe_req(struct rtw_dev *rtwdev, req->ssids[i].ssid, req->ssids[i].ssid_len, req->ie_len); - rtw_append_probe_req_ie(rtwdev, skb, &list, rtwvif); + if (!skb) { + ret = -ENOMEM; + goto out; + } + ret = rtw_append_probe_req_ie(rtwdev, skb, &list, &bands, + rtwvif); + if (ret) + goto out; + kfree_skb(skb); } - return _rtw_hw_scan_update_probe_req(rtwdev, num, &list); + return _rtw_hw_scan_update_probe_req(rtwdev, num * bands, &list); + +out: + skb_queue_walk_safe(&list, skb, tmp) + kfree_skb(skb); + + return ret; } static int rtw_add_chan_info(struct rtw_dev *rtwdev, struct rtw_chan_info *info, @@ -2022,7 +2044,7 @@ void rtw_hw_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif, rtwdev->hal.rcr |= BIT_CBSSID_BCN; rtw_write32(rtwdev, REG_RCR, rtwdev->hal.rcr); - rtw_core_scan_complete(rtwdev, vif); + rtw_core_scan_complete(rtwdev, vif, true); ieee80211_wake_queues(rtwdev->hw); ieee80211_scan_completed(rtwdev->hw, &info); diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index ae7d97de5fdf4..647d2662955ba 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -72,6 +72,9 @@ static int rtw_ops_config(struct ieee80211_hw *hw, u32 changed) struct rtw_dev *rtwdev = hw->priv; int ret = 0; + /* let previous ips work finish to ensure we don't leave ips twice */ + cancel_work_sync(&rtwdev->ips_work); + mutex_lock(&rtwdev->mutex); rtw_leave_lps_deep(rtwdev); @@ -614,7 +617,7 @@ static void rtw_ops_sw_scan_complete(struct ieee80211_hw *hw, struct rtw_dev *rtwdev = hw->priv; mutex_lock(&rtwdev->mutex); - rtw_core_scan_complete(rtwdev, vif); + rtw_core_scan_complete(rtwdev, vif, false); mutex_unlock(&rtwdev->mutex); } diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index 38252113c4a87..39c223a2e3e2d 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -272,6 +272,15 @@ static void rtw_c2h_work(struct work_struct *work) } } +static void rtw_ips_work(struct work_struct *work) +{ + struct rtw_dev *rtwdev = container_of(work, struct rtw_dev, ips_work); + + mutex_lock(&rtwdev->mutex); + rtw_enter_ips(rtwdev); + mutex_unlock(&rtwdev->mutex); +} + static u8 rtw_acquire_macid(struct rtw_dev *rtwdev) { unsigned long mac_id; @@ -1339,7 +1348,8 @@ void rtw_core_scan_start(struct rtw_dev *rtwdev, struct rtw_vif *rtwvif, set_bit(RTW_FLAG_SCANNING, rtwdev->flags); } -void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif) +void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif, + bool hw_scan) { struct rtw_vif *rtwvif = (struct rtw_vif *)vif->drv_priv; u32 config = 0; @@ -1354,6 +1364,9 @@ void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif) rtw_vif_port_config(rtwdev, rtwvif, config); rtw_coex_scan_notify(rtwdev, COEX_SCAN_FINISH); + + if (rtwvif->net_type == RTW_NET_NO_LINK && hw_scan) + ieee80211_queue_work(rtwdev->hw, &rtwdev->ips_work); } int rtw_core_start(struct rtw_dev *rtwdev) @@ -1919,6 +1932,7 @@ int rtw_core_init(struct rtw_dev *rtwdev) INIT_DELAYED_WORK(&coex->wl_ccklock_work, rtw_coex_wl_ccklock_work); INIT_WORK(&rtwdev->tx_work, rtw_tx_work); INIT_WORK(&rtwdev->c2h_work, rtw_c2h_work); + INIT_WORK(&rtwdev->ips_work, rtw_ips_work); INIT_WORK(&rtwdev->fw_recovery_work, rtw_fw_recovery_work); INIT_WORK(&rtwdev->ba_work, rtw_txq_ba_work); skb_queue_head_init(&rtwdev->c2h_queue); diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h index dc1cd9bd4b8a3..36e1e408933db 100644 --- a/drivers/net/wireless/realtek/rtw88/main.h +++ b/drivers/net/wireless/realtek/rtw88/main.h @@ -1960,6 +1960,7 @@ struct rtw_dev { /* c2h cmd queue & handler work */ struct sk_buff_head c2h_queue; struct work_struct c2h_work; + struct work_struct ips_work; struct work_struct fw_recovery_work; /* used to protect txqs list */ @@ -2101,7 +2102,8 @@ void rtw_tx_report_purge_timer(struct timer_list *t); void rtw_update_sta_info(struct rtw_dev *rtwdev, struct rtw_sta_info *si); void rtw_core_scan_start(struct rtw_dev *rtwdev, struct rtw_vif *rtwvif, const u8 *mac_addr, bool hw_scan); -void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif); +void rtw_core_scan_complete(struct rtw_dev *rtwdev, struct ieee80211_vif *vif, + bool hw_scan); int rtw_core_start(struct rtw_dev *rtwdev); void rtw_core_stop(struct rtw_dev *rtwdev); int rtw_chip_info_setup(struct rtw_dev *rtwdev); diff --git a/drivers/net/wwan/qcom_bam_dmux.c b/drivers/net/wwan/qcom_bam_dmux.c index 5dfa2eba6014c..17d46f4d29139 100644 --- a/drivers/net/wwan/qcom_bam_dmux.c +++ b/drivers/net/wwan/qcom_bam_dmux.c @@ -755,7 +755,7 @@ static int __maybe_unused bam_dmux_runtime_resume(struct device *dev) return 0; dmux->tx = dma_request_chan(dev, "tx"); - if (IS_ERR(dmux->rx)) { + if (IS_ERR(dmux->tx)) { dev_err(dev, "Failed to request TX DMA channel: %pe\n", dmux->tx); dmux->tx = NULL; bam_dmux_runtime_suspend(dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 9ccf3d6087993..70ad891a76bae 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1025,6 +1025,9 @@ static unsigned long default_align(struct nd_region *nd_region) } } + if (nd_region->ndr_size < MEMREMAP_COMPAT_ALIGN_MAX) + align = PAGE_SIZE; + mappings = max_t(u16, 1, nd_region->ndr_mappings); div_u64_rem(align, mappings, &remainder); if (remainder) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd4720d37cc0b..6215d50ed3e7d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1683,13 +1683,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) -{ - return !uuid_is_null(&ids->uuid) || - memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) || - memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); -} - static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) { return uuid_equal(&a->uuid, &b->uuid) && @@ -1864,9 +1857,6 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_config_discard(disk, ns); blk_queue_max_write_zeroes_sectors(disk->queue, ns->ctrl->max_zeroes_sectors); - - set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || - test_bit(NVME_NS_FORCE_RO, &ns->flags)); } static inline bool nvme_first_scan(struct gendisk *disk) @@ -1925,6 +1915,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) goto out_unfreeze; } + set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); @@ -1937,6 +1929,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); nvme_update_disk_info(ns->head->disk, ns, id); + set_disk_ro(ns->head->disk, + (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); nvme_mpath_revalidate_paths(ns); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); @@ -3581,15 +3576,20 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { NULL, }; -static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, +static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_head *h; - lockdep_assert_held(&subsys->lock); + lockdep_assert_held(&ctrl->subsys->lock); - list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id != nsid) + list_for_each_entry(h, &ctrl->subsys->nsheads, entry) { + /* + * Private namespaces can share NSIDs under some conditions. + * In that case we can't use the same ns_head for namespaces + * with the same NSID. + */ + if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h)) continue; if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; @@ -3598,16 +3598,24 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, return NULL; } -static int __nvme_check_ids(struct nvme_subsystem *subsys, - struct nvme_ns_head *new) +static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys, + struct nvme_ns_ids *ids) { + bool has_uuid = !uuid_is_null(&ids->uuid); + bool has_nguid = memchr_inv(ids->nguid, 0, sizeof(ids->nguid)); + bool has_eui64 = memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); struct nvme_ns_head *h; lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (nvme_ns_ids_valid(&new->ids) && - nvme_ns_ids_equal(&new->ids, &h->ids)) + if (has_uuid && uuid_equal(&ids->uuid, &h->ids.uuid)) + return -EINVAL; + if (has_nguid && + memcmp(&ids->nguid, &h->ids.nguid, sizeof(ids->nguid)) == 0) + return -EINVAL; + if (has_eui64 && + memcmp(&ids->eui64, &h->ids.eui64, sizeof(ids->eui64)) == 0) return -EINVAL; } @@ -3706,7 +3714,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ids = *ids; kref_init(&head->ref); - ret = __nvme_check_ids(ctrl->subsys, head); + ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, &head->ids); if (ret) { dev_err(ctrl->device, "duplicate IDs for nsid %d\n", nsid); @@ -3749,7 +3757,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, int ret = 0; mutex_lock(&ctrl->subsys->lock); - head = nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl, nsid); if (!head) { head = nvme_alloc_ns_head(ctrl, nsid, ids); if (IS_ERR(head)) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ff775235534cf..a703f1f5fb64c 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -504,10 +504,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) /* * Add a multipath node if the subsystems supports multiple controllers. - * We also do this for private namespaces as the namespace sharing data could - * change after a rescan. + * We also do this for private namespaces as the namespace sharing flag + * could change after a rescan. */ - if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || + !nvme_is_unique_nsid(ctrl, head) || !multipath) return 0; head->disk = blk_alloc_disk(ctrl->numa_node); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a162f6c6da6e1..730cc80d84ff7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -716,6 +716,25 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, return queue_live; return __nvme_check_ready(ctrl, rq, queue_live); } + +/* + * NSID shall be unique for all shared namespaces, or if at least one of the + * following conditions is met: + * 1. Namespace Management is supported by the controller + * 2. ANA is supported by the controller + * 3. NVM Set are supported by the controller + * + * In other case, private namespace are not required to report a unique NSID. + */ +static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head) +{ + return head->shared || + (ctrl->oacs & NVME_CTRL_OACS_NS_MNGT_SUPP) || + (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) || + (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS); +} + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 65e00c64a588b..d66e2de044e0a 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -30,6 +30,44 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* lockdep can detect a circular dependency of the form + * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock + * because dependencies are tracked for both nvme-tcp and user contexts. Using + * a separate class prevents lockdep from conflating nvme-tcp socket use with + * user-space socket API use. + */ +static struct lock_class_key nvme_tcp_sk_key[2]; +static struct lock_class_key nvme_tcp_slock_key[2]; + +static void nvme_tcp_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) + return; + + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", + &nvme_tcp_slock_key[0], + "sk_lock-AF_INET-NVME", + &nvme_tcp_sk_key[0]); + break; + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", + &nvme_tcp_slock_key[1], + "sk_lock-AF_INET6-NVME", + &nvme_tcp_sk_key[1]); + break; + default: + WARN_ON_ONCE(1); + } +} +#else +static void nvme_tcp_reclassify_socket(struct socket *sock) { } +#endif + enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -1469,6 +1507,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, goto err_destroy_mutex; } + nvme_tcp_reclassify_socket(queue->sock); + /* Single syn retry */ tcp_sock_set_syncnt(queue->sock->sk, 1); diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 0d9f6b21babb1..708c7529647fd 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -159,9 +159,12 @@ int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn, * write happen to have any RW1C (write-one-to-clear) bits set, we * just inadvertently cleared something we shouldn't have. */ - dev_warn_ratelimited(&bus->dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", - size, pci_domain_nr(bus), bus->number, - PCI_SLOT(devfn), PCI_FUNC(devfn), where); + if (!bus->unsafe_warn) { + dev_warn(&bus->dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", + size, pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where); + bus->unsafe_warn = 1; + } mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8)); tmp = readl(addr) & mask; diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 6974bd5aa1165..343fe1429e3c2 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -453,10 +453,6 @@ static int imx6_pcie_enable_ref_clk(struct imx6_pcie *imx6_pcie) case IMX7D: break; case IMX8MM: - ret = clk_prepare_enable(imx6_pcie->pcie_aux); - if (ret) - dev_err(dev, "unable to enable pcie_aux clock\n"); - break; case IMX8MQ: ret = clk_prepare_enable(imx6_pcie->pcie_aux); if (ret) { @@ -809,9 +805,7 @@ static int imx6_pcie_start_link(struct dw_pcie *pci) /* Start LTSSM. */ imx6_pcie_ltssm_enable(dev); - ret = dw_pcie_wait_for_link(pci); - if (ret) - goto err_reset_phy; + dw_pcie_wait_for_link(pci); if (pci->link_gen == 2) { /* Allow Gen2 mode after the link is up. */ @@ -847,11 +841,7 @@ static int imx6_pcie_start_link(struct dw_pcie *pci) } /* Make sure link training is finished as well! */ - ret = dw_pcie_wait_for_link(pci); - if (ret) { - dev_err(dev, "Failed to bring link up!\n"); - goto err_reset_phy; - } + dw_pcie_wait_for_link(pci); } else { dev_info(dev, "Link: Gen2 disabled\n"); } @@ -983,6 +973,7 @@ static int imx6_pcie_suspend_noirq(struct device *dev) case IMX8MM: if (phy_power_off(imx6_pcie->phy)) dev_err(dev, "unable to power off PHY\n"); + phy_exit(imx6_pcie->phy); break; default: break; diff --git a/drivers/pci/controller/dwc/pcie-fu740.c b/drivers/pci/controller/dwc/pcie-fu740.c index 00cde9a248b5a..78d002be4f821 100644 --- a/drivers/pci/controller/dwc/pcie-fu740.c +++ b/drivers/pci/controller/dwc/pcie-fu740.c @@ -181,10 +181,59 @@ static int fu740_pcie_start_link(struct dw_pcie *pci) { struct device *dev = pci->dev; struct fu740_pcie *afp = dev_get_drvdata(dev); + u8 cap_exp = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + int ret; + u32 orig, tmp; + + /* + * Force 2.5GT/s when starting the link, due to some devices not + * probing at higher speeds. This happens with the PCIe switch + * on the Unmatched board when U-Boot has not initialised the PCIe. + * The fix in U-Boot is to force 2.5GT/s, which then gets cleared + * by the soft reset done by this driver. + */ + dev_dbg(dev, "cap_exp at %x\n", cap_exp); + dw_pcie_dbi_ro_wr_en(pci); + + tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP); + orig = tmp & PCI_EXP_LNKCAP_SLS; + tmp &= ~PCI_EXP_LNKCAP_SLS; + tmp |= PCI_EXP_LNKCAP_SLS_2_5GB; + dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp); /* Enable LTSSM */ writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_APP_LTSSM_ENABLE); - return 0; + + ret = dw_pcie_wait_for_link(pci); + if (ret) { + dev_err(dev, "error: link did not start\n"); + goto err; + } + + tmp = dw_pcie_readl_dbi(pci, cap_exp + PCI_EXP_LNKCAP); + if ((tmp & PCI_EXP_LNKCAP_SLS) != orig) { + dev_dbg(dev, "changing speed back to original\n"); + + tmp &= ~PCI_EXP_LNKCAP_SLS; + tmp |= orig; + dw_pcie_writel_dbi(pci, cap_exp + PCI_EXP_LNKCAP, tmp); + + tmp = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); + tmp |= PORT_LOGIC_SPEED_CHANGE; + dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, tmp); + + ret = dw_pcie_wait_for_link(pci); + if (ret) { + dev_err(dev, "error: link did not start at new speed\n"); + goto err; + } + } + + ret = 0; +err: + WARN_ON(ret); /* we assume that errors will be very rare */ + dw_pcie_dbi_ro_wr_dis(pci); + return ret; } static int fu740_pcie_host_init(struct pcie_port *pp) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 4f5b44827d213..82e2c618d532d 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -846,7 +846,9 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, case PCI_EXP_RTSTA: { u32 isr0 = advk_readl(pcie, PCIE_ISR0_REG); u32 msglog = advk_readl(pcie, PCIE_MSG_LOG_REG); - *value = (isr0 & PCIE_MSG_PM_PME_MASK) << 16 | (msglog >> 16); + *value = msglog >> 16; + if (isr0 & PCIE_MSG_PM_PME_MASK) + *value |= PCI_EXP_RTSTA_PME; return PCI_BRIDGE_EMUL_HANDLED; } @@ -1388,7 +1390,6 @@ static void advk_pcie_remove_irq_domain(struct advk_pcie *pcie) static void advk_pcie_handle_msi(struct advk_pcie *pcie) { u32 msi_val, msi_mask, msi_status, msi_idx; - u16 msi_data; msi_mask = advk_readl(pcie, PCIE_MSI_MASK_REG); msi_val = advk_readl(pcie, PCIE_MSI_STATUS_REG); @@ -1398,13 +1399,9 @@ static void advk_pcie_handle_msi(struct advk_pcie *pcie) if (!(BIT(msi_idx) & msi_status)) continue; - /* - * msi_idx contains bits [4:0] of the msi_data and msi_data - * contains 16bit MSI interrupt number - */ advk_writel(pcie, BIT(msi_idx), PCIE_MSI_STATUS_REG); - msi_data = advk_readl(pcie, PCIE_MSI_PAYLOAD_REG) & PCIE_MSI_DATA_MASK; - generic_handle_irq(msi_data); + if (generic_handle_domain_irq(pcie->msi_inner_domain, msi_idx) == -EINVAL) + dev_err_ratelimited(&pcie->pdev->dev, "unexpected MSI 0x%02x\n", msi_idx); } advk_writel(pcie, PCIE_ISR0_MSI_INT_PENDING, diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 0d5acbfc7143f..7c763d820c52c 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -465,7 +465,7 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) return 1; } - if ((size > SZ_1K) && (size < SZ_4G) && !(*ib_reg_mask & (1 << 0))) { + if ((size > SZ_1K) && (size < SZ_1T) && !(*ib_reg_mask & (1 << 0))) { *ib_reg_mask |= (1 << 0); return 0; } @@ -479,28 +479,27 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) } static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, - struct resource_entry *entry, - u8 *ib_reg_mask) + struct of_pci_range *range, u8 *ib_reg_mask) { void __iomem *cfg_base = port->cfg_base; struct device *dev = port->dev; void __iomem *bar_addr; u32 pim_reg; - u64 cpu_addr = entry->res->start; - u64 pci_addr = cpu_addr - entry->offset; - u64 size = resource_size(entry->res); + u64 cpu_addr = range->cpu_addr; + u64 pci_addr = range->pci_addr; + u64 size = range->size; u64 mask = ~(size - 1) | EN_REG; u32 flags = PCI_BASE_ADDRESS_MEM_TYPE_64; u32 bar_low; int region; - region = xgene_pcie_select_ib_reg(ib_reg_mask, size); + region = xgene_pcie_select_ib_reg(ib_reg_mask, range->size); if (region < 0) { dev_warn(dev, "invalid pcie dma-range config\n"); return; } - if (entry->res->flags & IORESOURCE_PREFETCH) + if (range->flags & IORESOURCE_PREFETCH) flags |= PCI_BASE_ADDRESS_MEM_PREFETCH; bar_low = pcie_bar_low_val((u32)cpu_addr, flags); @@ -531,13 +530,25 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie *port, static int xgene_pcie_parse_map_dma_ranges(struct xgene_pcie *port) { - struct pci_host_bridge *bridge = pci_host_bridge_from_priv(port); - struct resource_entry *entry; + struct device_node *np = port->node; + struct of_pci_range range; + struct of_pci_range_parser parser; + struct device *dev = port->dev; u8 ib_reg_mask = 0; - resource_list_for_each_entry(entry, &bridge->dma_ranges) - xgene_pcie_setup_ib_reg(port, entry, &ib_reg_mask); + if (of_pci_dma_range_parser_init(&parser, np)) { + dev_err(dev, "missing dma-ranges property\n"); + return -EINVAL; + } + + /* Get the dma-ranges from DT */ + for_each_of_pci_range(&parser, &range) { + u64 end = range.cpu_addr + range.size - 1; + dev_dbg(dev, "0x%08x 0x%016llx..0x%016llx -> 0x%016llx\n", + range.flags, range.cpu_addr, end, range.pci_addr); + xgene_pcie_setup_ib_reg(port, &range, &ib_reg_mask); + } return 0; } diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 1c1ebf3dad43c..85dce560831a8 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -98,6 +98,8 @@ static int pcie_poll_cmd(struct controller *ctrl, int timeout) if (slot_status & PCI_EXP_SLTSTA_CC) { pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_CC); + ctrl->cmd_busy = 0; + smp_mb(); return 1; } msleep(10); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 65f7f6b0576c6..da829274fc66d 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1811,6 +1811,18 @@ static void quirk_alder_ioapic(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EESSC, quirk_alder_ioapic); #endif +static void quirk_no_msi(struct pci_dev *dev) +{ + pci_info(dev, "avoiding MSI to work around a hardware defect\n"); + dev->no_msi = 1; +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4386, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4387, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4388, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4389, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x438a, quirk_no_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x438b, quirk_no_msi); + static void quirk_pcie_mch(struct pci_dev *pdev) { pdev->no_msi = 1; diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index e1a0c44bc6864..7d6ffdf44a415 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -141,7 +141,7 @@ config ARM_DMC620_PMU config MARVELL_CN10K_TAD_PMU tristate "Marvell CN10K LLC-TAD PMU" - depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Provides support for Last-Level cache Tag-and-data Units (LLC-TAD) performance monitors on CN10K family silicons. diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 0e48adce57ef3..71448229bc5e9 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -71,9 +71,11 @@ #define CMN_DTM_WPn(n) (0x1A0 + (n) * 0x18) #define CMN_DTM_WPn_CONFIG(n) (CMN_DTM_WPn(n) + 0x00) #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2 GENMASK_ULL(18,17) -#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(6) -#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(5) -#define CMN_DTM_WPn_CONFIG_WP_GRP BIT(4) +#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(9) +#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(8) +#define CMN600_WPn_CONFIG_WP_COMBINE BIT(6) +#define CMN600_WPn_CONFIG_WP_EXCLUSIVE BIT(5) +#define CMN_DTM_WPn_CONFIG_WP_GRP GENMASK_ULL(5, 4) #define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1) #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0) #define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08) @@ -155,6 +157,7 @@ #define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24) #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48) #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51) +/* Note that we don't yet support the tertiary match group on newer IPs */ #define CMN_CONFIG_WP_GRP BIT_ULL(56) #define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57) #define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0) @@ -595,6 +598,9 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3))) return 0; + if (chan == 4 && cmn->model == CMN600) + return 0; + if ((chan == 5 && cmn->rsp_vc_num < 2) || (chan == 6 && cmn->dat_vc_num < 2)) return 0; @@ -905,15 +911,18 @@ static u32 arm_cmn_wp_config(struct perf_event *event) u32 grp = CMN_EVENT_WP_GRP(event); u32 exc = CMN_EVENT_WP_EXCLUSIVE(event); u32 combine = CMN_EVENT_WP_COMBINE(event); + bool is_cmn600 = to_cmn(event->pmu)->model == CMN600; config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) | - FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1); + if (exc) + config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE : + CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE; if (combine && !grp) - config |= CMN_DTM_WPn_CONFIG_WP_COMBINE; - + config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE : + CMN_DTM_WPn_CONFIG_WP_COMBINE; return config; } diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.c b/drivers/phy/broadcom/phy-brcm-usb-init.c index 9391ab42a12b3..dd0f66288fbdd 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.c +++ b/drivers/phy/broadcom/phy-brcm-usb-init.c @@ -79,6 +79,7 @@ enum brcm_family_type { BRCM_FAMILY_3390A0, + BRCM_FAMILY_4908, BRCM_FAMILY_7250B0, BRCM_FAMILY_7271A0, BRCM_FAMILY_7364A0, @@ -96,6 +97,7 @@ enum brcm_family_type { static const char *family_names[BRCM_FAMILY_COUNT] = { USB_BRCM_FAMILY(3390A0), + USB_BRCM_FAMILY(4908), USB_BRCM_FAMILY(7250B0), USB_BRCM_FAMILY(7271A0), USB_BRCM_FAMILY(7364A0), @@ -203,6 +205,27 @@ usb_reg_bits_map_table[BRCM_FAMILY_COUNT][USB_CTRL_SELECTOR_COUNT] = { USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK, ENDIAN_SETTINGS, /* USB_CTRL_SETUP ENDIAN bits */ }, + /* 4908 */ + [BRCM_FAMILY_4908] = { + 0, /* USB_CTRL_SETUP_SCB1_EN_MASK */ + 0, /* USB_CTRL_SETUP_SCB2_EN_MASK */ + 0, /* USB_CTRL_SETUP_SS_EHCI64BIT_EN_MASK */ + 0, /* USB_CTRL_SETUP_STRAP_IPP_SEL_MASK */ + 0, /* USB_CTRL_SETUP_OC3_DISABLE_MASK */ + 0, /* USB_CTRL_PLL_CTL_PLL_IDDQ_PWRDN_MASK */ + 0, /* USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK */ + USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK, + USB_CTRL_USB_PM_USB_PWRDN_MASK, + 0, /* USB_CTRL_USB30_CTL1_XHC_SOFT_RESETB_MASK */ + 0, /* USB_CTRL_USB30_CTL1_USB3_IOC_MASK */ + 0, /* USB_CTRL_USB30_CTL1_USB3_IPP_MASK */ + 0, /* USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK */ + 0, /* USB_CTRL_USB_PM_SOFT_RESET_MASK */ + 0, /* USB_CTRL_SETUP_CC_DRD_MODE_ENABLE_MASK */ + 0, /* USB_CTRL_SETUP_STRAP_CC_DRD_MODE_ENABLE_SEL_MASK */ + 0, /* USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK */ + 0, /* USB_CTRL_SETUP ENDIAN bits */ + }, /* 7250b0 */ [BRCM_FAMILY_7250B0] = { USB_CTRL_SETUP_SCB1_EN_MASK, @@ -559,6 +582,7 @@ static void brcmusb_usb3_pll_54mhz(struct brcm_usb_init_params *params) */ switch (params->selected_family) { case BRCM_FAMILY_3390A0: + case BRCM_FAMILY_4908: case BRCM_FAMILY_7250B0: case BRCM_FAMILY_7366C0: case BRCM_FAMILY_74371A0: @@ -1004,6 +1028,18 @@ static const struct brcm_usb_init_ops bcm7445_ops = { .set_dual_select = usb_set_dual_select, }; +void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params) +{ + int fam; + + fam = BRCM_FAMILY_4908; + params->selected_family = fam; + params->usb_reg_bits_map = + &usb_reg_bits_map_table[fam][0]; + params->family_name = family_names[fam]; + params->ops = &bcm7445_ops; +} + void brcm_usb_dvr_init_7445(struct brcm_usb_init_params *params) { int fam; diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h index a39f30fa2e991..1ccb5ddab865c 100644 --- a/drivers/phy/broadcom/phy-brcm-usb-init.h +++ b/drivers/phy/broadcom/phy-brcm-usb-init.h @@ -64,6 +64,7 @@ struct brcm_usb_init_params { bool suspend_with_clocks; }; +void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params); void brcm_usb_dvr_init_7445(struct brcm_usb_init_params *params); void brcm_usb_dvr_init_7216(struct brcm_usb_init_params *params); void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params); diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index 0f1deb6e0eabf..2cb3779fcdf82 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -283,6 +283,15 @@ static const struct attribute_group brcm_usb_phy_group = { .attrs = brcm_usb_phy_attrs, }; +static const struct match_chip_info chip_info_4908 = { + .init_func = &brcm_usb_dvr_init_4908, + .required_regs = { + BRCM_REGS_CTRL, + BRCM_REGS_XHCI_EC, + -1, + }, +}; + static const struct match_chip_info chip_info_7216 = { .init_func = &brcm_usb_dvr_init_7216, .required_regs = { @@ -318,7 +327,7 @@ static const struct match_chip_info chip_info_7445 = { static const struct of_device_id brcm_usb_dt_ids[] = { { .compatible = "brcm,bcm4908-usb-phy", - .data = &chip_info_7445, + .data = &chip_info_4908, }, { .compatible = "brcm,bcm7216-usb-phy", diff --git a/drivers/phy/phy-core-mipi-dphy.c b/drivers/phy/phy-core-mipi-dphy.c index ccb4045685cdd..929e86d6558e0 100644 --- a/drivers/phy/phy-core-mipi-dphy.c +++ b/drivers/phy/phy-core-mipi-dphy.c @@ -64,10 +64,10 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock, cfg->hs_trail = max(4 * 8 * ui, 60000 + 4 * 4 * ui); cfg->init = 100; - cfg->lpx = 60000; + cfg->lpx = 50000; cfg->ta_get = 5 * cfg->lpx; cfg->ta_go = 4 * cfg->lpx; - cfg->ta_sure = 2 * cfg->lpx; + cfg->ta_sure = cfg->lpx; cfg->wakeup = 1000; cfg->hs_clk_rate = hs_clk_rate; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 5f7c421ab6e76..334cb85855a93 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1038,6 +1038,7 @@ int mtk_pctrl_init(struct platform_device *pdev, node = of_parse_phandle(np, "mediatek,pctl-regmap", 0); if (node) { pctl->regmap1 = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(pctl->regmap1)) return PTR_ERR(pctl->regmap1); } else if (regmap) { @@ -1051,6 +1052,7 @@ int mtk_pctrl_init(struct platform_device *pdev, node = of_parse_phandle(np, "mediatek,pctl-regmap", 1); if (node) { pctl->regmap2 = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(pctl->regmap2)) return PTR_ERR(pctl->regmap2); } diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index f9f9110f2107d..fe6cf068c4f41 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -96,20 +96,16 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, err = hw->soc->bias_get_combo(hw, desc, &pullup, &ret); if (err) goto out; + if (ret == MTK_PUPD_SET_R1R0_00) + ret = MTK_DISABLE; if (param == PIN_CONFIG_BIAS_DISABLE) { - if (ret == MTK_PUPD_SET_R1R0_00) - ret = MTK_DISABLE; + if (ret != MTK_DISABLE) + err = -EINVAL; } else if (param == PIN_CONFIG_BIAS_PULL_UP) { - /* When desire to get pull-up value, return - * error if current setting is pull-down - */ - if (!pullup) + if (!pullup || ret == MTK_DISABLE) err = -EINVAL; } else if (param == PIN_CONFIG_BIAS_PULL_DOWN) { - /* When desire to get pull-down value, return - * error if current setting is pull-up - */ - if (pullup) + if (pullup || ret == MTK_DISABLE) err = -EINVAL; } } else { @@ -188,8 +184,7 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, } static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, - enum pin_config_param param, - enum pin_config_param arg) + enum pin_config_param param, u32 arg) { struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev); const struct mtk_pin_desc *desc; @@ -586,6 +581,9 @@ ssize_t mtk_pctrl_show_one_pin(struct mtk_pinctrl *hw, if (gpio >= hw->soc->npins) return -EINVAL; + if (mtk_is_virt_gpio(hw, gpio)) + return -EINVAL; + desc = (const struct mtk_pin_desc *)&hw->soc->pins[gpio]; pinmux = mtk_pctrl_get_pinmux(hw, gpio); if (pinmux >= hw->soc->nfuncs) @@ -737,10 +735,10 @@ static int mtk_pconf_group_get(struct pinctrl_dev *pctldev, unsigned group, unsigned long *config) { struct mtk_pinctrl *hw = pinctrl_dev_get_drvdata(pctldev); + struct mtk_pinctrl_group *grp = &hw->groups[group]; - *config = hw->groups[group].config; - - return 0; + /* One pin per group only */ + return mtk_pinconf_get(pctldev, grp->pin, config); } static int mtk_pconf_group_set(struct pinctrl_dev *pctldev, unsigned group, @@ -756,8 +754,6 @@ static int mtk_pconf_group_set(struct pinctrl_dev *pctldev, unsigned group, pinconf_to_config_argument(configs[i])); if (ret < 0) return ret; - - grp->config = configs[i]; } return 0; @@ -988,7 +984,7 @@ int mtk_paris_pinctrl_probe(struct platform_device *pdev, hw->nbase = hw->soc->nbase_names; if (of_find_property(hw->dev->of_node, - "mediatek,rsel_resistance_in_si_unit", NULL)) + "mediatek,rsel-resistance-in-si-unit", NULL)) hw->rsel_si_unit = true; else hw->rsel_si_unit = false; diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index 39828e9c3120a..4757bf964d3cd 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -1883,8 +1883,10 @@ static int nmk_pinctrl_probe(struct platform_device *pdev) } prcm_np = of_parse_phandle(np, "prcm", 0); - if (prcm_np) + if (prcm_np) { npct->prcm_base = of_iomap(prcm_np, 0); + of_node_put(prcm_np); + } if (!npct->prcm_base) { if (version == PINCTRL_NMK_STN8815) { dev_info(&pdev->dev, diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c index 4d81908d6725d..41136f63014a4 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c @@ -78,7 +78,6 @@ struct npcm7xx_gpio { struct gpio_chip gc; int irqbase; int irq; - void *priv; struct irq_chip irq_chip; u32 pinctrl_id; int (*direction_input)(struct gpio_chip *chip, unsigned offset); @@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); sts = ioread32(bank->base + NPCM7XX_GP_N_EVST); en = ioread32(bank->base + NPCM7XX_GP_N_EVEN); - dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts, + dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts, en); sts &= en; @@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = BIT(d->hwirq); - dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio, + dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio, d->irq, type); switch (type) { case IRQ_TYPE_EDGE_RISING: - dev_dbg(d->chip->parent_device, "edge.rising\n"); + dev_dbg(bank->gc.parent, "edge.rising\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_FALLING: - dev_dbg(d->chip->parent_device, "edge.falling\n"); + dev_dbg(bank->gc.parent, "edge.falling\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_BOTH: - dev_dbg(d->chip->parent_device, "edge.both\n"); + dev_dbg(bank->gc.parent, "edge.both\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); break; case IRQ_TYPE_LEVEL_LOW: - dev_dbg(d->chip->parent_device, "level.low\n"); + dev_dbg(bank->gc.parent, "level.low\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_LEVEL_HIGH: - dev_dbg(d->chip->parent_device, "level.high\n"); + dev_dbg(bank->gc.parent, "level.high\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; default: - dev_dbg(d->chip->parent_device, "invalid irq type\n"); + dev_dbg(bank->gc.parent, "invalid irq type\n"); return -EINVAL; } @@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = d->hwirq; - dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST); } @@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Clear events */ - dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC); } @@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Enable events */ - dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS); } @@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d) unsigned int gpio = d->hwirq; /* active-high, input, clear interrupt, enable interrupt */ - dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq); + dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq); npcmgpio_direction_input(gc, gpio); npcmgpio_irq_ack(d); npcmgpio_irq_unmask(d); @@ -905,7 +904,7 @@ static struct npcm7xx_func npcm7xx_funcs[] = { #define DRIVE_STRENGTH_HI_SHIFT 12 #define DRIVE_STRENGTH_MASK 0x0000FF00 -#define DS(lo, hi) (((lo) << DRIVE_STRENGTH_LO_SHIFT) | \ +#define DSTR(lo, hi) (((lo) << DRIVE_STRENGTH_LO_SHIFT) | \ ((hi) << DRIVE_STRENGTH_HI_SHIFT)) #define DSLO(x) (((x) >> DRIVE_STRENGTH_LO_SHIFT) & 0xF) #define DSHI(x) (((x) >> DRIVE_STRENGTH_HI_SHIFT) & 0xF) @@ -925,31 +924,31 @@ struct npcm7xx_pincfg { static const struct npcm7xx_pincfg pincfg[] = { /* PIN FUNCTION 1 FUNCTION 2 FUNCTION 3 FLAGS */ NPCM7XX_PINCFG(0, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(1, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(2, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(1, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(2, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(3, iox1, MFSEL1, 30, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(4, iox2, MFSEL3, 14, smb1d, I2CSEGSEL, 7, none, NONE, 0, SLEW), NPCM7XX_PINCFG(5, iox2, MFSEL3, 14, smb1d, I2CSEGSEL, 7, none, NONE, 0, SLEW), NPCM7XX_PINCFG(6, iox2, MFSEL3, 14, smb2d, I2CSEGSEL, 10, none, NONE, 0, SLEW), NPCM7XX_PINCFG(7, iox2, MFSEL3, 14, smb2d, I2CSEGSEL, 10, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(8, lkgpo1, FLOCKR1, 4, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(9, lkgpo2, FLOCKR1, 8, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(10, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(11, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(8, lkgpo1, FLOCKR1, 4, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(9, lkgpo2, FLOCKR1, 8, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(10, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(11, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(12, gspi, MFSEL1, 24, smb5b, I2CSEGSEL, 19, none, NONE, 0, SLEW), NPCM7XX_PINCFG(13, gspi, MFSEL1, 24, smb5b, I2CSEGSEL, 19, none, NONE, 0, SLEW), NPCM7XX_PINCFG(14, gspi, MFSEL1, 24, smb5c, I2CSEGSEL, 20, none, NONE, 0, SLEW), NPCM7XX_PINCFG(15, gspi, MFSEL1, 24, smb5c, I2CSEGSEL, 20, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(16, lkgpo0, FLOCKR1, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(17, pspi2, MFSEL3, 13, smb4den, I2CSEGSEL, 23, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(18, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(19, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(16, lkgpo0, FLOCKR1, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(17, pspi2, MFSEL3, 13, smb4den, I2CSEGSEL, 23, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(18, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(19, pspi2, MFSEL3, 13, smb4b, I2CSEGSEL, 14, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(20, smb4c, I2CSEGSEL, 15, smb15, MFSEL3, 8, none, NONE, 0, 0), NPCM7XX_PINCFG(21, smb4c, I2CSEGSEL, 15, smb15, MFSEL3, 8, none, NONE, 0, 0), NPCM7XX_PINCFG(22, smb4d, I2CSEGSEL, 16, smb14, MFSEL3, 7, none, NONE, 0, 0), NPCM7XX_PINCFG(23, smb4d, I2CSEGSEL, 16, smb14, MFSEL3, 7, none, NONE, 0, 0), - NPCM7XX_PINCFG(24, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(25, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(24, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(25, ioxh, MFSEL3, 18, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(26, smb5, MFSEL1, 2, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(27, smb5, MFSEL1, 2, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(28, smb4, MFSEL1, 1, none, NONE, 0, none, NONE, 0, 0), @@ -965,12 +964,12 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(39, smb3b, I2CSEGSEL, 11, none, NONE, 0, none, NONE, 0, SLEW), NPCM7XX_PINCFG(40, smb3b, I2CSEGSEL, 11, none, NONE, 0, none, NONE, 0, SLEW), NPCM7XX_PINCFG(41, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(42, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, DS(2, 4) | GPO), + NPCM7XX_PINCFG(42, bmcuart0a, MFSEL1, 9, none, NONE, 0, none, NONE, 0, DSTR(2, 4) | GPO), NPCM7XX_PINCFG(43, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, bmcuart1, MFSEL3, 24, 0), NPCM7XX_PINCFG(44, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, bmcuart1, MFSEL3, 24, 0), NPCM7XX_PINCFG(45, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(46, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DS(2, 8)), - NPCM7XX_PINCFG(47, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DS(2, 8)), + NPCM7XX_PINCFG(46, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DSTR(2, 8)), + NPCM7XX_PINCFG(47, uart1, MFSEL1, 10, jtag2, MFSEL4, 0, none, NONE, 0, DSTR(2, 8)), NPCM7XX_PINCFG(48, uart2, MFSEL1, 11, bmcuart0b, MFSEL4, 1, none, NONE, 0, GPO), NPCM7XX_PINCFG(49, uart2, MFSEL1, 11, bmcuart0b, MFSEL4, 1, none, NONE, 0, 0), NPCM7XX_PINCFG(50, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0), @@ -980,8 +979,8 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(54, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(55, uart2, MFSEL1, 11, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(56, r1err, MFSEL1, 12, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(57, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DS(2, 4)), - NPCM7XX_PINCFG(58, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DS(2, 4)), + NPCM7XX_PINCFG(57, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), + NPCM7XX_PINCFG(58, r1md, MFSEL1, 13, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), NPCM7XX_PINCFG(59, smb3d, I2CSEGSEL, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(60, smb3d, I2CSEGSEL, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(61, uart1, MFSEL1, 10, none, NONE, 0, none, NONE, 0, GPO), @@ -1004,19 +1003,19 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(77, fanin13, MFSEL2, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(78, fanin14, MFSEL2, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(79, fanin15, MFSEL2, 15, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(80, pwm0, MFSEL2, 16, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(81, pwm1, MFSEL2, 17, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(82, pwm2, MFSEL2, 18, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(83, pwm3, MFSEL2, 19, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(84, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(85, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(86, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(80, pwm0, MFSEL2, 16, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(81, pwm1, MFSEL2, 17, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(82, pwm2, MFSEL2, 18, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(83, pwm3, MFSEL2, 19, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(84, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(85, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(86, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(87, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(88, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(89, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(90, r2err, MFSEL1, 15, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(91, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DS(2, 4)), - NPCM7XX_PINCFG(92, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DS(2, 4)), + NPCM7XX_PINCFG(91, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), + NPCM7XX_PINCFG(92, r2md, MFSEL1, 16, none, NONE, 0, none, NONE, 0, DSTR(2, 4)), NPCM7XX_PINCFG(93, ga20kbc, MFSEL1, 17, smb5d, I2CSEGSEL, 21, none, NONE, 0, 0), NPCM7XX_PINCFG(94, ga20kbc, MFSEL1, 17, smb5d, I2CSEGSEL, 21, none, NONE, 0, 0), NPCM7XX_PINCFG(95, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, 0), @@ -1062,34 +1061,34 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(133, smb10, MFSEL4, 13, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(134, smb11, MFSEL4, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(135, smb11, MFSEL4, 14, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(136, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(137, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(138, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(139, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(140, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(136, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(137, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(138, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(139, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(140, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(141, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(142, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(142, sd1, MFSEL3, 12, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(143, sd1, MFSEL3, 12, sd1pwr, MFSEL4, 5, none, NONE, 0, 0), - NPCM7XX_PINCFG(144, pwm4, MFSEL2, 20, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(145, pwm5, MFSEL2, 21, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(146, pwm6, MFSEL2, 22, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(147, pwm7, MFSEL2, 23, none, NONE, 0, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(148, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(149, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(150, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(151, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(152, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(144, pwm4, MFSEL2, 20, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(145, pwm5, MFSEL2, 21, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(146, pwm6, MFSEL2, 22, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(147, pwm7, MFSEL2, 23, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(148, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(149, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(150, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(151, mmc8, MFSEL3, 11, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(152, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(153, mmcwp, FLOCKR1, 24, none, NONE, 0, none, NONE, 0, 0), /* Z1/A1 */ - NPCM7XX_PINCFG(154, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(154, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(155, mmccd, MFSEL3, 25, mmcrst, MFSEL4, 6, none, NONE, 0, 0), /* Z1/A1 */ - NPCM7XX_PINCFG(156, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(157, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(158, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(159, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - - NPCM7XX_PINCFG(160, clkout, MFSEL1, 21, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(161, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, DS(8, 12)), - NPCM7XX_PINCFG(162, serirq, NONE, 0, gpio, MFSEL1, 31, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(156, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(157, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(158, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(159, mmc, MFSEL3, 10, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + + NPCM7XX_PINCFG(160, clkout, MFSEL1, 21, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(161, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, DSTR(8, 12)), + NPCM7XX_PINCFG(162, serirq, NONE, 0, gpio, MFSEL1, 31, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(163, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, 0), NPCM7XX_PINCFG(164, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, SLEWLPC), NPCM7XX_PINCFG(165, lpc, NONE, 0, espi, MFSEL4, 8, gpio, MFSEL1, 26, SLEWLPC), @@ -1102,25 +1101,25 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(172, smb6, MFSEL3, 1, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(173, smb7, MFSEL3, 2, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(174, smb7, MFSEL3, 2, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(175, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(176, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(177, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(178, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(179, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(180, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), + NPCM7XX_PINCFG(175, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(176, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(177, pspi1, MFSEL3, 4, faninx, MFSEL3, 3, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(178, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(179, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(180, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), NPCM7XX_PINCFG(181, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(182, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(183, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(184, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(185, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(186, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(187, spi3cs1, MFSEL4, 17, none, NONE, 0, none, NONE, 0, DS(8, 12)), - NPCM7XX_PINCFG(188, spi3quad, MFSEL4, 20, spi3cs2, MFSEL4, 18, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(189, spi3quad, MFSEL4, 20, spi3cs3, MFSEL4, 19, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(190, gpio, FLOCKR1, 20, nprd_smi, NONE, 0, none, NONE, 0, DS(2, 4)), - NPCM7XX_PINCFG(191, none, NONE, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), /* XX */ - - NPCM7XX_PINCFG(192, none, NONE, 0, none, NONE, 0, none, NONE, 0, DS(8, 12)), /* XX */ + NPCM7XX_PINCFG(183, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(184, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(185, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(186, spi3, MFSEL4, 16, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(187, spi3cs1, MFSEL4, 17, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), + NPCM7XX_PINCFG(188, spi3quad, MFSEL4, 20, spi3cs2, MFSEL4, 18, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(189, spi3quad, MFSEL4, 20, spi3cs3, MFSEL4, 19, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(190, gpio, FLOCKR1, 20, nprd_smi, NONE, 0, none, NONE, 0, DSTR(2, 4)), + NPCM7XX_PINCFG(191, none, NONE, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), /* XX */ + + NPCM7XX_PINCFG(192, none, NONE, 0, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), /* XX */ NPCM7XX_PINCFG(193, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(194, smb0b, I2CSEGSEL, 0, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(195, smb0b, I2CSEGSEL, 0, none, NONE, 0, none, NONE, 0, 0), @@ -1131,11 +1130,11 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(200, r2, MFSEL1, 14, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(201, r1, MFSEL3, 9, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(202, smb0c, I2CSEGSEL, 1, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(203, faninx, MFSEL3, 3, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(203, faninx, MFSEL3, 3, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(204, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, SLEW), NPCM7XX_PINCFG(205, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(206, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DS(4, 8)), - NPCM7XX_PINCFG(207, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DS(4, 8)), + NPCM7XX_PINCFG(206, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DSTR(4, 8)), + NPCM7XX_PINCFG(207, ddc, NONE, 0, gpio, MFSEL3, 22, none, NONE, 0, DSTR(4, 8)), NPCM7XX_PINCFG(208, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(209, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(210, rg2, MFSEL4, 24, ddr, MFSEL3, 26, none, NONE, 0, 0), @@ -1147,20 +1146,20 @@ static const struct npcm7xx_pincfg pincfg[] = { NPCM7XX_PINCFG(216, rg2mdio, MFSEL4, 23, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(217, rg2mdio, MFSEL4, 23, ddr, MFSEL3, 26, none, NONE, 0, 0), NPCM7XX_PINCFG(218, wdog1, MFSEL3, 19, none, NONE, 0, none, NONE, 0, 0), - NPCM7XX_PINCFG(219, wdog2, MFSEL3, 20, none, NONE, 0, none, NONE, 0, DS(4, 8)), + NPCM7XX_PINCFG(219, wdog2, MFSEL3, 20, none, NONE, 0, none, NONE, 0, DSTR(4, 8)), NPCM7XX_PINCFG(220, smb12, MFSEL3, 5, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(221, smb12, MFSEL3, 5, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(222, smb13, MFSEL3, 6, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(223, smb13, MFSEL3, 6, none, NONE, 0, none, NONE, 0, 0), NPCM7XX_PINCFG(224, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, SLEW), - NPCM7XX_PINCFG(225, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(226, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW | GPO), - NPCM7XX_PINCFG(227, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(228, spixcs1, MFSEL4, 28, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(229, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(230, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DS(8, 12) | SLEW), - NPCM7XX_PINCFG(231, clkreq, MFSEL4, 9, none, NONE, 0, none, NONE, 0, DS(8, 12)), + NPCM7XX_PINCFG(225, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(226, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW | GPO), + NPCM7XX_PINCFG(227, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(228, spixcs1, MFSEL4, 28, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(229, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(230, spix, MFSEL4, 27, none, NONE, 0, none, NONE, 0, DSTR(8, 12) | SLEW), + NPCM7XX_PINCFG(231, clkreq, MFSEL4, 9, none, NONE, 0, none, NONE, 0, DSTR(8, 12)), NPCM7XX_PINCFG(253, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* SDHC1 power */ NPCM7XX_PINCFG(254, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* SDHC2 power */ NPCM7XX_PINCFG(255, none, NONE, 0, none, NONE, 0, none, NONE, 0, GPI), /* DACOSEL */ @@ -1561,7 +1560,7 @@ static int npcm7xx_get_groups_count(struct pinctrl_dev *pctldev) { struct npcm7xx_pinctrl *npcm = pinctrl_dev_get_drvdata(pctldev); - dev_dbg(npcm->dev, "group size: %d\n", ARRAY_SIZE(npcm7xx_groups)); + dev_dbg(npcm->dev, "group size: %zu\n", ARRAY_SIZE(npcm7xx_groups)); return ARRAY_SIZE(npcm7xx_groups); } diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index f8edcc88ac013..415d1df8f46a5 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -30,10 +30,10 @@ static const struct pin_config_item conf_items[] = { PCONFDUMP(PIN_CONFIG_BIAS_BUS_HOLD, "input bias bus hold", NULL, false), PCONFDUMP(PIN_CONFIG_BIAS_DISABLE, "input bias disabled", NULL, false), PCONFDUMP(PIN_CONFIG_BIAS_HIGH_IMPEDANCE, "input bias high impedance", NULL, false), - PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", NULL, false), + PCONFDUMP(PIN_CONFIG_BIAS_PULL_DOWN, "input bias pull down", "ohms", true), PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, - "input bias pull to pin specific state", NULL, false), - PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false), + "input bias pull to pin specific state", "ohms", true), + PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", "ohms", true), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false), diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 2712f51eb2381..fa6becca17889 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -119,6 +119,8 @@ struct ingenic_chip_info { unsigned int num_functions; const u32 *pull_ups, *pull_downs; + + const struct regmap_access_table *access_table; }; struct ingenic_pinctrl { @@ -2179,6 +2181,17 @@ static const struct function_desc x1000_functions[] = { { "mac", x1000_mac_groups, ARRAY_SIZE(x1000_mac_groups), }, }; +static const struct regmap_range x1000_access_ranges[] = { + regmap_reg_range(0x000, 0x400 - 4), + regmap_reg_range(0x700, 0x800 - 4), +}; + +/* shared with X1500 */ +static const struct regmap_access_table x1000_access_table = { + .yes_ranges = x1000_access_ranges, + .n_yes_ranges = ARRAY_SIZE(x1000_access_ranges), +}; + static const struct ingenic_chip_info x1000_chip_info = { .num_chips = 4, .reg_offset = 0x100, @@ -2189,6 +2202,7 @@ static const struct ingenic_chip_info x1000_chip_info = { .num_functions = ARRAY_SIZE(x1000_functions), .pull_ups = x1000_pull_ups, .pull_downs = x1000_pull_downs, + .access_table = &x1000_access_table, }; static int x1500_uart0_data_pins[] = { 0x4a, 0x4b, }; @@ -2300,6 +2314,7 @@ static const struct ingenic_chip_info x1500_chip_info = { .num_functions = ARRAY_SIZE(x1500_functions), .pull_ups = x1000_pull_ups, .pull_downs = x1000_pull_downs, + .access_table = &x1000_access_table, }; static const u32 x1830_pull_ups[4] = { @@ -2506,6 +2521,16 @@ static const struct function_desc x1830_functions[] = { { "mac", x1830_mac_groups, ARRAY_SIZE(x1830_mac_groups), }, }; +static const struct regmap_range x1830_access_ranges[] = { + regmap_reg_range(0x0000, 0x4000 - 4), + regmap_reg_range(0x7000, 0x8000 - 4), +}; + +static const struct regmap_access_table x1830_access_table = { + .yes_ranges = x1830_access_ranges, + .n_yes_ranges = ARRAY_SIZE(x1830_access_ranges), +}; + static const struct ingenic_chip_info x1830_chip_info = { .num_chips = 4, .reg_offset = 0x1000, @@ -2516,6 +2541,7 @@ static const struct ingenic_chip_info x1830_chip_info = { .num_functions = ARRAY_SIZE(x1830_functions), .pull_ups = x1830_pull_ups, .pull_downs = x1830_pull_downs, + .access_table = &x1830_access_table, }; static const u32 x2000_pull_ups[5] = { @@ -2969,6 +2995,17 @@ static const struct function_desc x2000_functions[] = { { "otg", x2000_otg_groups, ARRAY_SIZE(x2000_otg_groups), }, }; +static const struct regmap_range x2000_access_ranges[] = { + regmap_reg_range(0x000, 0x500 - 4), + regmap_reg_range(0x700, 0x800 - 4), +}; + +/* shared with X2100 */ +static const struct regmap_access_table x2000_access_table = { + .yes_ranges = x2000_access_ranges, + .n_yes_ranges = ARRAY_SIZE(x2000_access_ranges), +}; + static const struct ingenic_chip_info x2000_chip_info = { .num_chips = 5, .reg_offset = 0x100, @@ -2979,6 +3016,7 @@ static const struct ingenic_chip_info x2000_chip_info = { .num_functions = ARRAY_SIZE(x2000_functions), .pull_ups = x2000_pull_ups, .pull_downs = x2000_pull_downs, + .access_table = &x2000_access_table, }; static const u32 x2100_pull_ups[5] = { @@ -3189,6 +3227,7 @@ static const struct ingenic_chip_info x2100_chip_info = { .num_functions = ARRAY_SIZE(x2100_functions), .pull_ups = x2100_pull_ups, .pull_downs = x2100_pull_downs, + .access_table = &x2000_access_table, }; static u32 ingenic_gpio_read_reg(struct ingenic_gpio_chip *jzgc, u8 reg) @@ -4168,7 +4207,12 @@ static int __init ingenic_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(base); regmap_config = ingenic_pinctrl_regmap_config; - regmap_config.max_register = chip_info->num_chips * chip_info->reg_offset; + if (chip_info->access_table) { + regmap_config.rd_table = chip_info->access_table; + regmap_config.wr_table = chip_info->access_table; + } else { + regmap_config.max_register = chip_info->num_chips * chip_info->reg_offset - 4; + } jzpc->map = devm_regmap_init_mmio(dev, base, ®map_config); if (IS_ERR(jzpc->map)) { diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c index 639f1130e9892..666f1e3889e00 100644 --- a/drivers/pinctrl/pinctrl-microchip-sgpio.c +++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "core.h" #include "pinconf.h" @@ -116,6 +117,7 @@ struct sgpio_priv { u32 clock; struct regmap *regs; const struct sgpio_properties *properties; + spinlock_t lock; }; struct sgpio_port_addr { @@ -229,6 +231,7 @@ static void sgpio_output_set(struct sgpio_priv *priv, int value) { unsigned int bit = SGPIO_SRC_BITS * addr->bit; + unsigned long flags; u32 clr, set; switch (priv->properties->arch) { @@ -247,7 +250,10 @@ static void sgpio_output_set(struct sgpio_priv *priv, default: return; } + + spin_lock_irqsave(&priv->lock, flags); sgpio_clrsetbits(priv, REG_PORT_CONFIG, addr->port, clr, set); + spin_unlock_irqrestore(&priv->lock, flags); } static int sgpio_output_get(struct sgpio_priv *priv, @@ -575,10 +581,13 @@ static void microchip_sgpio_irq_settype(struct irq_data *data, struct sgpio_bank *bank = gpiochip_get_data(chip); unsigned int gpio = irqd_to_hwirq(data); struct sgpio_port_addr addr; + unsigned long flags; u32 ena; sgpio_pin_to_addr(bank->priv, gpio, &addr); + spin_lock_irqsave(&bank->priv->lock, flags); + /* Disable interrupt while changing type */ ena = sgpio_readl(bank->priv, REG_INT_ENABLE, addr.bit); sgpio_writel(bank->priv, ena & ~BIT(addr.port), REG_INT_ENABLE, addr.bit); @@ -595,6 +604,8 @@ static void microchip_sgpio_irq_settype(struct irq_data *data, /* Possibly re-enable interrupts */ sgpio_writel(bank->priv, ena, REG_INT_ENABLE, addr.bit); + + spin_unlock_irqrestore(&bank->priv->lock, flags); } static void microchip_sgpio_irq_setreg(struct irq_data *data, @@ -605,13 +616,16 @@ static void microchip_sgpio_irq_setreg(struct irq_data *data, struct sgpio_bank *bank = gpiochip_get_data(chip); unsigned int gpio = irqd_to_hwirq(data); struct sgpio_port_addr addr; + unsigned long flags; sgpio_pin_to_addr(bank->priv, gpio, &addr); + spin_lock_irqsave(&bank->priv->lock, flags); if (clear) sgpio_clrsetbits(bank->priv, reg, addr.bit, BIT(addr.port), 0); else sgpio_clrsetbits(bank->priv, reg, addr.bit, 0, BIT(addr.port)); + spin_unlock_irqrestore(&bank->priv->lock, flags); } static void microchip_sgpio_irq_mask(struct irq_data *data) @@ -833,6 +847,7 @@ static int microchip_sgpio_probe(struct platform_device *pdev) return -ENOMEM; priv->dev = dev; + spin_lock_init(&priv->lock); reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); if (IS_ERR(reset)) diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index fc969208d904c..370459243007b 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -1750,8 +1750,8 @@ static int ocelot_gpiochip_register(struct platform_device *pdev, gc->base = -1; gc->label = "ocelot-gpio"; - irq = irq_of_parse_and_map(gc->of_node, 0); - if (irq) { + irq = platform_get_irq_optional(pdev, 0); + if (irq > 0) { girq = &gc->irq; girq->chip = &ocelot_irqchip; girq->parent_handler = ocelot_irq_handler; @@ -1788,9 +1788,10 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev) .val_bits = 32, .reg_stride = 4, .max_register = 32, + .name = "pincfg", }; - base = devm_platform_ioremap_resource(pdev, 0); + base = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(base)) { dev_dbg(&pdev->dev, "Failed to ioremap config registers (no extended pinconf)\n"); return NULL; diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index d8dd8415fa81b..a1b598b86aa9f 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -2693,6 +2693,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) node = of_parse_phandle(np, "rockchip,grf", 0); if (node) { info->regmap_base = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(info->regmap_base)) return PTR_ERR(info->regmap_base); } else { @@ -2725,6 +2726,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) node = of_parse_phandle(np, "rockchip,pmu", 0); if (node) { info->regmap_pmu = syscon_node_to_regmap(node); + of_node_put(node); if (IS_ERR(info->regmap_pmu)) return PTR_ERR(info->regmap_pmu); } diff --git a/drivers/pinctrl/renesas/core.c b/drivers/pinctrl/renesas/core.c index 0d4ea2e22a535..12d41ac017b53 100644 --- a/drivers/pinctrl/renesas/core.c +++ b/drivers/pinctrl/renesas/core.c @@ -741,7 +741,7 @@ static int sh_pfc_suspend_init(struct sh_pfc *pfc) { return 0; } #ifdef DEBUG #define SH_PFC_MAX_REGS 300 -#define SH_PFC_MAX_ENUMS 3000 +#define SH_PFC_MAX_ENUMS 5000 static unsigned int sh_pfc_errors __initdata; static unsigned int sh_pfc_warnings __initdata; @@ -865,7 +865,8 @@ static void __init sh_pfc_check_cfg_reg(const char *drvname, GENMASK(cfg_reg->reg_width - 1, 0)); if (cfg_reg->field_width) { - n = cfg_reg->reg_width / cfg_reg->field_width; + fw = cfg_reg->field_width; + n = (cfg_reg->reg_width / fw) << fw; /* Skip field checks (done at build time) */ goto check_enum_ids; } diff --git a/drivers/pinctrl/renesas/pfc-r8a77470.c b/drivers/pinctrl/renesas/pfc-r8a77470.c index e6e5487691c16..cf7153d06a953 100644 --- a/drivers/pinctrl/renesas/pfc-r8a77470.c +++ b/drivers/pinctrl/renesas/pfc-r8a77470.c @@ -2140,7 +2140,7 @@ static const unsigned int vin0_clk_mux[] = { VI0_CLK_MARK, }; /* - VIN1 ------------------------------------------------------------------- */ -static const union vin_data vin1_data_pins = { +static const union vin_data12 vin1_data_pins = { .data12 = { RCAR_GP_PIN(3, 1), RCAR_GP_PIN(3, 2), RCAR_GP_PIN(3, 3), RCAR_GP_PIN(3, 4), @@ -2150,7 +2150,7 @@ static const union vin_data vin1_data_pins = { RCAR_GP_PIN(3, 15), RCAR_GP_PIN(3, 16), }, }; -static const union vin_data vin1_data_mux = { +static const union vin_data12 vin1_data_mux = { .data12 = { VI1_DATA0_MARK, VI1_DATA1_MARK, VI1_DATA2_MARK, VI1_DATA3_MARK, diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index 2e490e7696f47..4102ce955bd7f 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -585,13 +585,11 @@ static const struct samsung_pin_ctrl exynos850_pin_ctrl[] __initconst = { /* pin-controller instance 0 ALIVE data */ .pin_banks = exynos850_pin_banks0, .nr_banks = ARRAY_SIZE(exynos850_pin_banks0), - .eint_gpio_init = exynos_eint_gpio_init, .eint_wkup_init = exynos_eint_wkup_init, }, { /* pin-controller instance 1 CMGP data */ .pin_banks = exynos850_pin_banks1, .nr_banks = ARRAY_SIZE(exynos850_pin_banks1), - .eint_gpio_init = exynos_eint_gpio_init, .eint_wkup_init = exynos_eint_wkup_init, }, { /* pin-controller instance 2 AUD data */ diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index 0f6e9305fec58..c4175fea7d741 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -1002,6 +1002,16 @@ samsung_pinctrl_get_soc_data_for_of_alias(struct platform_device *pdev) return &(of_data->ctrl[id]); } +static void samsung_banks_of_node_put(struct samsung_pinctrl_drv_data *d) +{ + struct samsung_pin_bank *bank; + unsigned int i; + + bank = d->pin_banks; + for (i = 0; i < d->nr_banks; ++i, ++bank) + of_node_put(bank->of_node); +} + /* retrieve the soc specific data */ static const struct samsung_pin_ctrl * samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d, @@ -1117,19 +1127,19 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) if (ctrl->retention_data) { drvdata->retention_ctrl = ctrl->retention_data->init(drvdata, ctrl->retention_data); - if (IS_ERR(drvdata->retention_ctrl)) - return PTR_ERR(drvdata->retention_ctrl); + if (IS_ERR(drvdata->retention_ctrl)) { + ret = PTR_ERR(drvdata->retention_ctrl); + goto err_put_banks; + } } ret = samsung_pinctrl_register(pdev, drvdata); if (ret) - return ret; + goto err_put_banks; ret = samsung_gpiolib_register(pdev, drvdata); - if (ret) { - samsung_pinctrl_unregister(pdev, drvdata); - return ret; - } + if (ret) + goto err_unregister; if (ctrl->eint_gpio_init) ctrl->eint_gpio_init(drvdata); @@ -1139,6 +1149,12 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) platform_set_drvdata(pdev, drvdata); return 0; + +err_unregister: + samsung_pinctrl_unregister(pdev, drvdata); +err_put_banks: + samsung_banks_of_node_put(drvdata); + return ret; } /* diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index f901d2e43166c..88cbc434c06b2 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -2,6 +2,7 @@ # tell define_trace.h where to find the cros ec trace header CFLAGS_cros_ec_trace.o:= -I$(src) +CFLAGS_cros_ec_sensorhub_ring.o:= -I$(src) obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o @@ -20,7 +21,7 @@ obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_chardev.o obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o obj-$(CONFIG_CROS_EC_DEBUGFS) += cros_ec_debugfs.o -cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o cros_ec_trace.o +cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o obj-$(CONFIG_CROS_EC_SENSORHUB) += cros-ec-sensorhub.o obj-$(CONFIG_CROS_EC_SYSFS) += cros_ec_sysfs.o obj-$(CONFIG_CROS_USBPD_LOGGER) += cros_usbpd_logger.o diff --git a/drivers/platform/chrome/cros_ec_sensorhub_ring.c b/drivers/platform/chrome/cros_ec_sensorhub_ring.c index 98e37080f7609..71948dade0e2a 100644 --- a/drivers/platform/chrome/cros_ec_sensorhub_ring.c +++ b/drivers/platform/chrome/cros_ec_sensorhub_ring.c @@ -17,7 +17,8 @@ #include #include -#include "cros_ec_trace.h" +#define CREATE_TRACE_POINTS +#include "cros_ec_sensorhub_trace.h" /* Precision of fixed point for the m values from the filter */ #define M_PRECISION BIT(23) diff --git a/drivers/platform/chrome/cros_ec_sensorhub_trace.h b/drivers/platform/chrome/cros_ec_sensorhub_trace.h new file mode 100644 index 0000000000000..57d9b47859692 --- /dev/null +++ b/drivers/platform/chrome/cros_ec_sensorhub_trace.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Trace events for the ChromeOS Sensorhub kernel module + * + * Copyright 2021 Google LLC. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cros_ec + +#if !defined(_CROS_EC_SENSORHUB_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _CROS_EC_SENSORHUB_TRACE_H_ + +#include +#include + +#include + +TRACE_EVENT(cros_ec_sensorhub_timestamp, + TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp, + s64 current_timestamp, s64 current_time), + TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp, + current_time), + TP_STRUCT__entry( + __field(u32, ec_sample_timestamp) + __field(u32, ec_fifo_timestamp) + __field(s64, fifo_timestamp) + __field(s64, current_timestamp) + __field(s64, current_time) + __field(s64, delta) + ), + TP_fast_assign( + __entry->ec_sample_timestamp = ec_sample_timestamp; + __entry->ec_fifo_timestamp = ec_fifo_timestamp; + __entry->fifo_timestamp = fifo_timestamp; + __entry->current_timestamp = current_timestamp; + __entry->current_time = current_time; + __entry->delta = current_timestamp - current_time; + ), + TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", + __entry->ec_sample_timestamp, + __entry->ec_fifo_timestamp, + __entry->fifo_timestamp, + __entry->current_timestamp, + __entry->current_time, + __entry->delta + ) +); + +TRACE_EVENT(cros_ec_sensorhub_data, + TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp, + s64 current_timestamp, s64 current_time), + TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time), + TP_STRUCT__entry( + __field(u32, ec_sensor_num) + __field(u32, ec_fifo_timestamp) + __field(s64, fifo_timestamp) + __field(s64, current_timestamp) + __field(s64, current_time) + __field(s64, delta) + ), + TP_fast_assign( + __entry->ec_sensor_num = ec_sensor_num; + __entry->ec_fifo_timestamp = ec_fifo_timestamp; + __entry->fifo_timestamp = fifo_timestamp; + __entry->current_timestamp = current_timestamp; + __entry->current_time = current_time; + __entry->delta = current_timestamp - current_time; + ), + TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", + __entry->ec_sensor_num, + __entry->ec_fifo_timestamp, + __entry->fifo_timestamp, + __entry->current_timestamp, + __entry->current_time, + __entry->delta + ) +); + +TRACE_EVENT(cros_ec_sensorhub_filter, + TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy), + TP_ARGS(state, dx, dy), + TP_STRUCT__entry( + __field(s64, dx) + __field(s64, dy) + __field(s64, median_m) + __field(s64, median_error) + __field(s64, history_len) + __field(s64, x) + __field(s64, y) + ), + TP_fast_assign( + __entry->dx = dx; + __entry->dy = dy; + __entry->median_m = state->median_m; + __entry->median_error = state->median_error; + __entry->history_len = state->history_len; + __entry->x = state->x_offset; + __entry->y = state->y_offset; + ), + TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld", + __entry->dx, + __entry->dy, + __entry->median_m, + __entry->median_error, + __entry->history_len, + __entry->x, + __entry->y + ) +); + + +#endif /* _CROS_EC_SENSORHUB_TRACE_H_ */ + +/* this part must be outside header guard */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cros_ec_sensorhub_trace + +#include diff --git a/drivers/platform/chrome/cros_ec_trace.h b/drivers/platform/chrome/cros_ec_trace.h index 7e7cfc98657a4..9bb5cd2c98b8b 100644 --- a/drivers/platform/chrome/cros_ec_trace.h +++ b/drivers/platform/chrome/cros_ec_trace.h @@ -15,7 +15,6 @@ #include #include #include -#include #include @@ -71,100 +70,6 @@ TRACE_EVENT(cros_ec_request_done, __entry->retval) ); -TRACE_EVENT(cros_ec_sensorhub_timestamp, - TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp, - s64 current_timestamp, s64 current_time), - TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp, - current_time), - TP_STRUCT__entry( - __field(u32, ec_sample_timestamp) - __field(u32, ec_fifo_timestamp) - __field(s64, fifo_timestamp) - __field(s64, current_timestamp) - __field(s64, current_time) - __field(s64, delta) - ), - TP_fast_assign( - __entry->ec_sample_timestamp = ec_sample_timestamp; - __entry->ec_fifo_timestamp = ec_fifo_timestamp; - __entry->fifo_timestamp = fifo_timestamp; - __entry->current_timestamp = current_timestamp; - __entry->current_time = current_time; - __entry->delta = current_timestamp - current_time; - ), - TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", - __entry->ec_sample_timestamp, - __entry->ec_fifo_timestamp, - __entry->fifo_timestamp, - __entry->current_timestamp, - __entry->current_time, - __entry->delta - ) -); - -TRACE_EVENT(cros_ec_sensorhub_data, - TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp, - s64 current_timestamp, s64 current_time), - TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time), - TP_STRUCT__entry( - __field(u32, ec_sensor_num) - __field(u32, ec_fifo_timestamp) - __field(s64, fifo_timestamp) - __field(s64, current_timestamp) - __field(s64, current_time) - __field(s64, delta) - ), - TP_fast_assign( - __entry->ec_sensor_num = ec_sensor_num; - __entry->ec_fifo_timestamp = ec_fifo_timestamp; - __entry->fifo_timestamp = fifo_timestamp; - __entry->current_timestamp = current_timestamp; - __entry->current_time = current_time; - __entry->delta = current_timestamp - current_time; - ), - TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", - __entry->ec_sensor_num, - __entry->ec_fifo_timestamp, - __entry->fifo_timestamp, - __entry->current_timestamp, - __entry->current_time, - __entry->delta - ) -); - -TRACE_EVENT(cros_ec_sensorhub_filter, - TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy), - TP_ARGS(state, dx, dy), - TP_STRUCT__entry( - __field(s64, dx) - __field(s64, dy) - __field(s64, median_m) - __field(s64, median_error) - __field(s64, history_len) - __field(s64, x) - __field(s64, y) - ), - TP_fast_assign( - __entry->dx = dx; - __entry->dy = dy; - __entry->median_m = state->median_m; - __entry->median_error = state->median_error; - __entry->history_len = state->history_len; - __entry->x = state->x_offset; - __entry->y = state->y_offset; - ), - TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld", - __entry->dx, - __entry->dy, - __entry->median_m, - __entry->median_error, - __entry->history_len, - __entry->x, - __entry->y - ) -); - - #endif /* _CROS_EC_TRACE_H_ */ /* this part must be outside header guard */ diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index 5de0bfb0bc4d9..952c1756f59ee 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -1075,7 +1075,13 @@ static int cros_typec_probe(struct platform_device *pdev) return -ENOMEM; typec->dev = dev; + typec->ec = dev_get_drvdata(pdev->dev.parent); + if (!typec->ec) { + dev_err(dev, "couldn't find parent EC device\n"); + return -ENODEV; + } + platform_set_drvdata(pdev, typec); ret = cros_typec_get_cmd_version(typec); diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 2104a2621e507..adab31b52f2af 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2059,7 +2059,7 @@ static int fan_boost_mode_check_present(struct asus_wmi *asus) err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_FAN_BOOST_MODE, &result); if (err) { - if (err == -ENODEV) + if (err == -ENODEV || err == -ENODATA) return 0; else return err; diff --git a/drivers/platform/x86/huawei-wmi.c b/drivers/platform/x86/huawei-wmi.c index a2d846c4a7eef..eac3e6b4ea113 100644 --- a/drivers/platform/x86/huawei-wmi.c +++ b/drivers/platform/x86/huawei-wmi.c @@ -470,10 +470,17 @@ static DEVICE_ATTR_RW(charge_control_thresholds); static int huawei_wmi_battery_add(struct power_supply *battery) { - device_create_file(&battery->dev, &dev_attr_charge_control_start_threshold); - device_create_file(&battery->dev, &dev_attr_charge_control_end_threshold); + int err = 0; - return 0; + err = device_create_file(&battery->dev, &dev_attr_charge_control_start_threshold); + if (err) + return err; + + err = device_create_file(&battery->dev, &dev_attr_charge_control_end_threshold); + if (err) + device_remove_file(&battery->dev, &dev_attr_charge_control_start_threshold); + + return err; } static int huawei_wmi_battery_remove(struct power_supply *battery) diff --git a/drivers/power/reset/gemini-poweroff.c b/drivers/power/reset/gemini-poweroff.c index 90e35c07240ae..b7f7a8225f22e 100644 --- a/drivers/power/reset/gemini-poweroff.c +++ b/drivers/power/reset/gemini-poweroff.c @@ -107,8 +107,8 @@ static int gemini_poweroff_probe(struct platform_device *pdev) return PTR_ERR(gpw->base); irq = platform_get_irq(pdev, 0); - if (!irq) - return -EINVAL; + if (irq < 0) + return irq; gpw->dev = dev; diff --git a/drivers/power/supply/ab8500_bmdata.c b/drivers/power/supply/ab8500_bmdata.c index 7ae95f5375801..9a8334a65de1b 100644 --- a/drivers/power/supply/ab8500_bmdata.c +++ b/drivers/power/supply/ab8500_bmdata.c @@ -188,13 +188,11 @@ int ab8500_bm_of_probe(struct power_supply *psy, * fall back to safe defaults. */ if ((bi->voltage_min_design_uv < 0) || - (bi->voltage_max_design_uv < 0) || - (bi->overvoltage_limit_uv < 0)) { + (bi->voltage_max_design_uv < 0)) { /* Nominal voltage is 3.7V for unknown batteries */ bi->voltage_min_design_uv = 3700000; - bi->voltage_max_design_uv = 3700000; - /* Termination voltage (overcharge limit) 4.05V */ - bi->overvoltage_limit_uv = 4050000; + /* Termination voltage 4.05V */ + bi->voltage_max_design_uv = 4050000; } if (bi->constant_charge_current_max_ua < 0) diff --git a/drivers/power/supply/ab8500_chargalg.c b/drivers/power/supply/ab8500_chargalg.c index c4a2fe07126c3..da490e090ce48 100644 --- a/drivers/power/supply/ab8500_chargalg.c +++ b/drivers/power/supply/ab8500_chargalg.c @@ -802,7 +802,7 @@ static void ab8500_chargalg_end_of_charge(struct ab8500_chargalg *di) if (di->charge_status == POWER_SUPPLY_STATUS_CHARGING && di->charge_state == STATE_NORMAL && !di->maintenance_chg && (di->batt_data.volt_uv >= - di->bm->bi->overvoltage_limit_uv || + di->bm->bi->voltage_max_design_uv || di->events.usb_cv_active || di->events.ac_cv_active) && di->batt_data.avg_curr_ua < di->bm->bi->charge_term_current_ua && @@ -2020,11 +2020,11 @@ static int ab8500_chargalg_probe(struct platform_device *pdev) psy_cfg.drv_data = di; /* Initilialize safety timer */ - hrtimer_init(&di->safety_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + hrtimer_init(&di->safety_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); di->safety_timer.function = ab8500_chargalg_safety_timer_expired; /* Initilialize maintenance timer */ - hrtimer_init(&di->maintenance_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + hrtimer_init(&di->maintenance_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); di->maintenance_timer.function = ab8500_chargalg_maintenance_timer_expired; diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c index b0919a6a65878..09a4cbd69676a 100644 --- a/drivers/power/supply/ab8500_fg.c +++ b/drivers/power/supply/ab8500_fg.c @@ -2263,7 +2263,13 @@ static int ab8500_fg_init_hw_registers(struct ab8500_fg *di) { int ret; - /* Set VBAT OVV threshold */ + /* + * Set VBAT OVV (overvoltage) threshold to 4.75V (typ) this is what + * the hardware supports, nothing else can be configured in hardware. + * See this as an "outer limit" where the charger will certainly + * shut down. Other (lower) overvoltage levels need to be implemented + * in software. + */ ret = abx500_mask_and_set_register_interruptible(di->dev, AB8500_CHARGER, AB8500_BATT_OVV, @@ -2521,8 +2527,10 @@ static int ab8500_fg_sysfs_init(struct ab8500_fg *di) ret = kobject_init_and_add(&di->fg_kobject, &ab8500_fg_ktype, NULL, "battery"); - if (ret < 0) + if (ret < 0) { + kobject_put(&di->fg_kobject); dev_err(di->dev, "failed to create sysfs entry\n"); + } return ret; } diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index 06c34b09349ca..8ad1b3b02490c 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -39,6 +39,7 @@ #define BQ24190_REG_POC_CHG_CONFIG_DISABLE 0x0 #define BQ24190_REG_POC_CHG_CONFIG_CHARGE 0x1 #define BQ24190_REG_POC_CHG_CONFIG_OTG 0x2 +#define BQ24190_REG_POC_CHG_CONFIG_OTG_ALT 0x3 #define BQ24190_REG_POC_SYS_MIN_MASK (BIT(3) | BIT(2) | BIT(1)) #define BQ24190_REG_POC_SYS_MIN_SHIFT 1 #define BQ24190_REG_POC_SYS_MIN_MIN 3000 @@ -550,7 +551,11 @@ static int bq24190_vbus_is_enabled(struct regulator_dev *dev) pm_runtime_mark_last_busy(bdi->dev); pm_runtime_put_autosuspend(bdi->dev); - return ret ? ret : val == BQ24190_REG_POC_CHG_CONFIG_OTG; + if (ret) + return ret; + + return (val == BQ24190_REG_POC_CHG_CONFIG_OTG || + val == BQ24190_REG_POC_CHG_CONFIG_OTG_ALT); } static const struct regulator_ops bq24190_vbus_ops = { diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c index 6fa65d118ec12..b08f7d0c41815 100644 --- a/drivers/power/supply/sbs-charger.c +++ b/drivers/power/supply/sbs-charger.c @@ -18,6 +18,7 @@ #include #include #include +#include #define SBS_CHARGER_REG_SPEC_INFO 0x11 #define SBS_CHARGER_REG_STATUS 0x13 @@ -209,7 +210,12 @@ static int sbs_probe(struct i2c_client *client, if (ret) return dev_err_probe(&client->dev, ret, "Failed to request irq\n"); } else { - INIT_DELAYED_WORK(&chip->work, sbs_delayed_work); + ret = devm_delayed_work_autocancel(&client->dev, &chip->work, + sbs_delayed_work); + if (ret) + return dev_err_probe(&client->dev, ret, + "Failed to init work for polling\n"); + schedule_delayed_work(&chip->work, msecs_to_jiffies(SBS_CHARGER_POLL_TIME)); } @@ -220,15 +226,6 @@ static int sbs_probe(struct i2c_client *client, return 0; } -static int sbs_remove(struct i2c_client *client) -{ - struct sbs_info *chip = i2c_get_clientdata(client); - - cancel_delayed_work_sync(&chip->work); - - return 0; -} - #ifdef CONFIG_OF static const struct of_device_id sbs_dt_ids[] = { { .compatible = "sbs,sbs-charger" }, @@ -245,7 +242,6 @@ MODULE_DEVICE_TABLE(i2c, sbs_id); static struct i2c_driver sbs_driver = { .probe = sbs_probe, - .remove = sbs_remove, .id_table = sbs_id, .driver = { .name = "sbs-charger", diff --git a/drivers/power/supply/wm8350_power.c b/drivers/power/supply/wm8350_power.c index e05cee457471b..908cfd45d2624 100644 --- a/drivers/power/supply/wm8350_power.c +++ b/drivers/power/supply/wm8350_power.c @@ -408,44 +408,112 @@ static const struct power_supply_desc wm8350_usb_desc = { * Initialisation *********************************************************************/ -static void wm8350_init_charger(struct wm8350 *wm8350) +static int wm8350_init_charger(struct wm8350 *wm8350) { + int ret; + /* register our interest in charger events */ - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, wm8350_charger_handler, 0, "Battery hot", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, + if (ret) + goto err; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, wm8350_charger_handler, 0, "Battery cold", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, + if (ret) + goto free_chg_bat_hot; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, wm8350_charger_handler, 0, "Battery fail", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO, + if (ret) + goto free_chg_bat_cold; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350_charger_handler, 0, "Charger timeout", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END, + if (ret) + goto free_chg_bat_fail; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END, wm8350_charger_handler, 0, "Charge end", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START, + if (ret) + goto free_chg_to; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START, wm8350_charger_handler, 0, "Charge start", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, + if (ret) + goto free_chg_end; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350_charger_handler, 0, "Fast charge ready", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, + if (ret) + goto free_chg_start; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350_charger_handler, 0, "Battery <3.9V", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, + if (ret) + goto free_chg_fast_rdy; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350_charger_handler, 0, "Battery <3.1V", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, + if (ret) + goto free_chg_vbatt_lt_3p9; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350_charger_handler, 0, "Battery <2.85V", wm8350); + if (ret) + goto free_chg_vbatt_lt_3p1; /* and supply change events */ - wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB, wm8350_charger_handler, 0, "USB", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, + if (ret) + goto free_chg_vbatt_lt_2p85; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, wm8350_charger_handler, 0, "Wall", wm8350); - wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB, + if (ret) + goto free_ext_usb_fb; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB, wm8350_charger_handler, 0, "Battery", wm8350); + if (ret) + goto free_ext_wall_fb; + + return 0; + +free_ext_wall_fb: + wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, wm8350); +free_ext_usb_fb: + wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB, wm8350); +free_chg_vbatt_lt_2p85: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350); +free_chg_vbatt_lt_3p1: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350); +free_chg_vbatt_lt_3p9: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350); +free_chg_fast_rdy: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350); +free_chg_start: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350); +free_chg_end: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350); +free_chg_to: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350); +free_chg_bat_fail: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, wm8350); +free_chg_bat_cold: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, wm8350); +free_chg_bat_hot: + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, wm8350); +err: + return ret; } static void free_charger_irq(struct wm8350 *wm8350) @@ -456,6 +524,7 @@ static void free_charger_irq(struct wm8350 *wm8350) wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350); + wm8350_free_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350); diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index b740866b228d9..1e8cac699646c 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -150,10 +150,17 @@ static int update_pd_power_uw(struct dtpm *dtpm) static void pd_release(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); + struct cpufreq_policy *policy; if (freq_qos_request_active(&dtpm_cpu->qos_req)) freq_qos_remove_request(&dtpm_cpu->qos_req); + policy = cpufreq_cpu_get(dtpm_cpu->cpu); + if (policy) { + for_each_cpu(dtpm_cpu->cpu, policy->related_cpus) + per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL; + } + kfree(dtpm_cpu); } diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index 35799e6401c99..2f4b11b4dfcd9 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c @@ -169,7 +169,7 @@ static int pps_gpio_probe(struct platform_device *pdev) /* GPIO setup */ ret = pps_gpio_setup(dev); if (ret) - return -EINVAL; + return ret; /* IRQ setup */ ret = gpiod_to_irq(data->gpio_pin); diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 0e4bc8b9329dd..b6f2cfd15dd2d 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -317,11 +317,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } EXPORT_SYMBOL(ptp_clock_register); +static int unregister_vclock(struct device *dev, void *data) +{ + struct ptp_clock *ptp = dev_get_drvdata(dev); + + ptp_vclock_unregister(info_to_vclock(ptp->info)); + return 0; +} + int ptp_clock_unregister(struct ptp_clock *ptp) { if (ptp_vclock_in_use(ptp)) { - pr_err("ptp: virtual clock in use\n"); - return -EBUSY; + device_for_each_child(&ptp->dev, NULL, unregister_vclock); } ptp->defunct = 1; diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c index 8e461f3baa05a..8cc8ae16553cf 100644 --- a/drivers/pwm/pwm-lpc18xx-sct.c +++ b/drivers/pwm/pwm-lpc18xx-sct.c @@ -395,12 +395,6 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_LIMIT, BIT(lpc18xx_pwm->period_event)); - ret = pwmchip_add(&lpc18xx_pwm->chip); - if (ret < 0) { - dev_err(&pdev->dev, "pwmchip_add failed: %d\n", ret); - goto disable_pwmclk; - } - for (i = 0; i < lpc18xx_pwm->chip.npwm; i++) { struct lpc18xx_pwm_data *data; @@ -410,14 +404,12 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) GFP_KERNEL); if (!data) { ret = -ENOMEM; - goto remove_pwmchip; + goto disable_pwmclk; } pwm_set_chip_data(pwm, data); } - platform_set_drvdata(pdev, lpc18xx_pwm); - val = lpc18xx_pwm_readl(lpc18xx_pwm, LPC18XX_PWM_CTRL); val &= ~LPC18XX_PWM_BIDIR; val &= ~LPC18XX_PWM_CTRL_HALT; @@ -425,10 +417,16 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) val |= LPC18XX_PWM_PRE(0); lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL, val); + ret = pwmchip_add(&lpc18xx_pwm->chip); + if (ret < 0) { + dev_err(&pdev->dev, "pwmchip_add failed: %d\n", ret); + goto disable_pwmclk; + } + + platform_set_drvdata(pdev, lpc18xx_pwm); + return 0; -remove_pwmchip: - pwmchip_remove(&lpc18xx_pwm->chip); disable_pwmclk: clk_disable_unprepare(lpc18xx_pwm->pwm_clk); return ret; diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index 9fc666107a06c..8490aa8eecb1a 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -1317,8 +1317,10 @@ static int rpm_reg_probe(struct platform_device *pdev) for_each_available_child_of_node(dev->of_node, node) { vreg = devm_kzalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL); - if (!vreg) + if (!vreg) { + of_node_put(node); return -ENOMEM; + } ret = rpm_regulator_init_vreg(vreg, dev, node, rpm, vreg_data); diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index ee46bfbf5eee7..991b4730d7687 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -37,11 +37,24 @@ static const struct regmap_config attiny_regmap_config = { static int attiny_lcd_power_enable(struct regulator_dev *rdev) { unsigned int data; + int ret, i; regmap_write(rdev->regmap, REG_POWERON, 1); + msleep(80); + /* Wait for nPWRDWN to go low to indicate poweron is done. */ - regmap_read_poll_timeout(rdev->regmap, REG_PORTB, data, - data & BIT(0), 10, 1000000); + for (i = 0; i < 20; i++) { + ret = regmap_read(rdev->regmap, REG_PORTB, &data); + if (!ret) { + if (data & BIT(0)) + break; + } + usleep_range(10000, 12000); + } + usleep_range(10000, 12000); + + if (ret) + pr_err("%s: regmap_read_poll_timeout failed %d\n", __func__, ret); /* Default to the same orientation as the closed source * firmware used for the panel. Runtime rotation @@ -57,23 +70,34 @@ static int attiny_lcd_power_disable(struct regulator_dev *rdev) { regmap_write(rdev->regmap, REG_PWM, 0); regmap_write(rdev->regmap, REG_POWERON, 0); - udelay(1); + msleep(30); return 0; } static int attiny_lcd_power_is_enabled(struct regulator_dev *rdev) { unsigned int data; - int ret; + int ret, i; - ret = regmap_read(rdev->regmap, REG_POWERON, &data); + for (i = 0; i < 10; i++) { + ret = regmap_read(rdev->regmap, REG_POWERON, &data); + if (!ret) + break; + usleep_range(10000, 12000); + } if (ret < 0) return ret; if (!(data & BIT(0))) return 0; - ret = regmap_read(rdev->regmap, REG_PORTB, &data); + for (i = 0; i < 10; i++) { + ret = regmap_read(rdev->regmap, REG_PORTB, &data); + if (!ret) + break; + usleep_range(10000, 12000); + } + if (ret < 0) return ret; @@ -103,20 +127,32 @@ static int attiny_update_status(struct backlight_device *bl) { struct regmap *regmap = bl_get_data(bl); int brightness = bl->props.brightness; + int ret, i; if (bl->props.power != FB_BLANK_UNBLANK || bl->props.fb_blank != FB_BLANK_UNBLANK) brightness = 0; - return regmap_write(regmap, REG_PWM, brightness); + for (i = 0; i < 10; i++) { + ret = regmap_write(regmap, REG_PWM, brightness); + if (!ret) + break; + } + + return ret; } static int attiny_get_brightness(struct backlight_device *bl) { struct regmap *regmap = bl_get_data(bl); - int ret, brightness; + int ret, brightness, i; + + for (i = 0; i < 10; i++) { + ret = regmap_read(regmap, REG_PWM, &brightness); + if (!ret) + break; + } - ret = regmap_read(regmap, REG_PWM, &brightness); if (ret) return ret; @@ -166,7 +202,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c, } regmap_write(regmap, REG_POWERON, 0); - mdelay(1); + msleep(30); config.dev = &i2c->dev; config.regmap = regmap; diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c b/drivers/remoteproc/qcom_q6v5_adsp.c index 098362e6e233b..7c02bc1322479 100644 --- a/drivers/remoteproc/qcom_q6v5_adsp.c +++ b/drivers/remoteproc/qcom_q6v5_adsp.c @@ -408,6 +408,7 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp) } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) return ret; diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c index 43ea8455546ca..b9ab91540b00d 100644 --- a/drivers/remoteproc/qcom_q6v5_mss.c +++ b/drivers/remoteproc/qcom_q6v5_mss.c @@ -1806,18 +1806,20 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc) * reserved memory regions from device's memory-region property. */ child = of_get_child_by_name(qproc->dev->of_node, "mba"); - if (!child) + if (!child) { node = of_parse_phandle(qproc->dev->of_node, "memory-region", 0); - else + } else { node = of_parse_phandle(child, "memory-region", 0); + of_node_put(child); + } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) { dev_err(qproc->dev, "unable to resolve mba region\n"); return ret; } - of_node_put(node); qproc->mba_phys = r.start; qproc->mba_size = resource_size(&r); @@ -1828,14 +1830,15 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc) } else { child = of_get_child_by_name(qproc->dev->of_node, "mpss"); node = of_parse_phandle(child, "memory-region", 0); + of_node_put(child); } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) { dev_err(qproc->dev, "unable to resolve mpss region\n"); return ret; } - of_node_put(node); qproc->mpss_phys = qproc->mpss_reloc = r.start; qproc->mpss_size = resource_size(&r); diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index 80bbafee98463..9a223d394087f 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -500,6 +500,7 @@ static int wcnss_alloc_memory_region(struct qcom_wcnss *wcnss) } ret = of_address_to_resource(node, 0, &r); + of_node_put(node); if (ret) return ret; diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c index b5a1e3b697d9f..581930483ef84 100644 --- a/drivers/remoteproc/remoteproc_debugfs.c +++ b/drivers/remoteproc/remoteproc_debugfs.c @@ -76,7 +76,7 @@ static ssize_t rproc_coredump_write(struct file *filp, int ret, err = 0; char buf[20]; - if (count > sizeof(buf)) + if (count < 1 || count > sizeof(buf)) return -EINVAL; ret = copy_from_user(buf, user_buf, count); diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index d8e8357981537..9edd662c69ace 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -804,9 +804,13 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); struct rtc_time tm; ktime_t now; + int err; + + err = __rtc_read_time(rtc, &tm); + if (err) + return err; timer->enabled = 1; - __rtc_read_time(rtc, &tm); now = rtc_tm_to_ktime(tm); /* Skip over expired timers */ @@ -820,7 +824,6 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) trace_rtc_timer_enqueue(timer); if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; - int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1; diff --git a/drivers/rtc/rtc-gamecube.c b/drivers/rtc/rtc-gamecube.c index f717b36f4738c..18ca3b38b2d04 100644 --- a/drivers/rtc/rtc-gamecube.c +++ b/drivers/rtc/rtc-gamecube.c @@ -235,6 +235,7 @@ static int gamecube_rtc_read_offset_from_sram(struct priv *d) } ret = of_address_to_resource(np, 0, &res); + of_node_put(np); if (ret) { pr_err("no io memory range found\n"); return -1; diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index ae9f131b43c0c..562f99b664a24 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -232,8 +232,10 @@ int mc146818_set_time(struct rtc_time *time) if (yrs >= 100) yrs -= 100; - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) - || RTC_ALWAYS_BCD) { + spin_lock_irqsave(&rtc_lock, flags); + save_control = CMOS_READ(RTC_CONTROL); + spin_unlock_irqrestore(&rtc_lock, flags); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { sec = bin2bcd(sec); min = bin2bcd(min); hrs = bin2bcd(hrs); diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index e38ee88483855..bad6a5d9c6839 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -350,9 +350,6 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) } } - if (!adev->irq[0]) - clear_bit(RTC_FEATURE_ALARM, ldata->rtc->features); - device_init_wakeup(&adev->dev, true); ldata->rtc = devm_rtc_allocate_device(&adev->dev); if (IS_ERR(ldata->rtc)) { @@ -360,6 +357,9 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) goto out; } + if (!adev->irq[0]) + clear_bit(RTC_FEATURE_ALARM, ldata->rtc->features); + ldata->rtc->ops = ops; ldata->rtc->range_min = vendor->range_min; ldata->rtc->range_max = vendor->range_max; diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 40a52feb315da..65047806a5410 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -604,7 +604,7 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc) FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no, tag, sc, io_req, sg_count, cmd_trace, - (((u64)CMD_FLAGS(sc) >> 32) | CMD_STATE(sc))); + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); /* if only we issued IO, will we have the io lock */ if (io_lock_acquired) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index a01a3a7b706b5..70173389f6ebd 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -530,7 +530,7 @@ MODULE_PARM_DESC(intr_conv, "interrupt converge enable (0-1)"); /* permit overriding the host protection capabilities mask (EEDP/T10 PI) */ static int prot_mask; -module_param(prot_mask, int, 0); +module_param(prot_mask, int, 0444); MODULE_PARM_DESC(prot_mask, " host protection capabilities mask, def=0x0 "); static void debugfs_work_handler_v3_hw(struct work_struct *work); diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index a315715b36227..7e0cde710fc3c 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -197,7 +197,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) task->total_xfer_len = qc->nbytes; task->num_scatter = qc->n_elem; task->data_dir = qc->dma_dir; - } else if (qc->tf.protocol == ATA_PROT_NODATA) { + } else if (!ata_is_data(qc->tf.protocol)) { task->data_dir = DMA_NONE; } else { for_each_sg(qc->sg, sg, qc->n_elem, si) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 76229b839560a..fb5a3a348dbec 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -5736,14 +5736,13 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) */ static int -mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz) +mpt3sas_check_same_4gb_region(dma_addr_t start_address, u32 pool_sz) { - long reply_pool_end_address; + dma_addr_t end_address; - reply_pool_end_address = reply_pool_start_address + pool_sz; + end_address = start_address + pool_sz - 1; - if (upper_32_bits(reply_pool_start_address) == - upper_32_bits(reply_pool_end_address)) + if (upper_32_bits(start_address) == upper_32_bits(end_address)) return 1; else return 0; @@ -5804,7 +5803,7 @@ _base_allocate_pcie_sgl_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) } if (!mpt3sas_check_same_4gb_region( - (long)ioc->pcie_sg_lookup[i].pcie_sgl, sz)) { + ioc->pcie_sg_lookup[i].pcie_sgl_dma, sz)) { ioc_err(ioc, "PCIE SGLs are not in same 4G !! pcie sgl (0x%p) dma = (0x%llx)\n", ioc->pcie_sg_lookup[i].pcie_sgl, (unsigned long long) @@ -5859,8 +5858,8 @@ _base_allocate_chain_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) GFP_KERNEL, &ctr->chain_buffer_dma); if (!ctr->chain_buffer) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long) - ctr->chain_buffer, ioc->chain_segment_sz)) { + if (!mpt3sas_check_same_4gb_region( + ctr->chain_buffer_dma, ioc->chain_segment_sz)) { ioc_err(ioc, "Chain buffers are not in same 4G !!! Chain buff (0x%p) dma = (0x%llx)\n", ctr->chain_buffer, @@ -5896,7 +5895,7 @@ _base_allocate_sense_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) GFP_KERNEL, &ioc->sense_dma); if (!ioc->sense) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long)ioc->sense, sz)) { + if (!mpt3sas_check_same_4gb_region(ioc->sense_dma, sz)) { dinitprintk(ioc, pr_err( "Bad Sense Pool! sense (0x%p) sense_dma = (0x%llx)\n", ioc->sense, (unsigned long long) ioc->sense_dma)); @@ -5929,7 +5928,7 @@ _base_allocate_reply_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) &ioc->reply_dma); if (!ioc->reply) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long)ioc->reply_free, sz)) { + if (!mpt3sas_check_same_4gb_region(ioc->reply_dma, sz)) { dinitprintk(ioc, pr_err( "Bad Reply Pool! Reply (0x%p) Reply dma = (0x%llx)\n", ioc->reply, (unsigned long long) ioc->reply_dma)); @@ -5964,7 +5963,7 @@ _base_allocate_reply_free_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) GFP_KERNEL, &ioc->reply_free_dma); if (!ioc->reply_free) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long)ioc->reply_free, sz)) { + if (!mpt3sas_check_same_4gb_region(ioc->reply_free_dma, sz)) { dinitprintk(ioc, pr_err("Bad Reply Free Pool! Reply Free (0x%p) Reply Free dma = (0x%llx)\n", ioc->reply_free, (unsigned long long) ioc->reply_free_dma)); @@ -6003,7 +6002,7 @@ _base_allocate_reply_post_free_array(struct MPT3SAS_ADAPTER *ioc, GFP_KERNEL, &ioc->reply_post_free_array_dma); if (!ioc->reply_post_free_array) return -EAGAIN; - if (!mpt3sas_check_same_4gb_region((long)ioc->reply_post_free_array, + if (!mpt3sas_check_same_4gb_region(ioc->reply_post_free_array_dma, reply_post_free_array_sz)) { dinitprintk(ioc, pr_err( "Bad Reply Free Pool! Reply Free (0x%p) Reply Free dma = (0x%llx)\n", @@ -6068,7 +6067,7 @@ base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) * resources and set DMA mask to 32 and allocate. */ if (!mpt3sas_check_same_4gb_region( - (long)ioc->reply_post[i].reply_post_free, sz)) { + ioc->reply_post[i].reply_post_free_dma, sz)) { dinitprintk(ioc, ioc_err(ioc, "bad Replypost free pool(0x%p)" "reply_post_free_dma = (0x%llx)\n", diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 9ec310b795c33..d853e8d0195a6 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -1788,6 +1788,7 @@ static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha, ccb->device = pm8001_ha_dev; ccb->ccb_tag = ccb_tag; ccb->task = task; + ccb->n_elem = 0; circularQ = &pm8001_ha->inbnd_q_tbl[0]; @@ -1849,6 +1850,7 @@ static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha, ccb->device = pm8001_ha_dev; ccb->ccb_tag = ccb_tag; ccb->task = task; + ccb->n_elem = 0; pm8001_ha_dev->id |= NCQ_READ_LOG_FLAG; pm8001_ha_dev->id |= NCQ_2ND_RLE_FLAG; @@ -1865,7 +1867,7 @@ static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha, sata_cmd.tag = cpu_to_le32(ccb_tag); sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - sata_cmd.ncqtag_atap_dir_m |= ((0x1 << 7) | (0x5 << 9)); + sata_cmd.ncqtag_atap_dir_m = cpu_to_le32((0x1 << 7) | (0x5 << 9)); memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis)); res = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd, @@ -2418,7 +2420,8 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) len = sizeof(struct pio_setup_fis); pm8001_dbg(pm8001_ha, IO, "PIO read len = %d\n", len); - } else if (t->ata_task.use_ncq) { + } else if (t->ata_task.use_ncq && + t->data_dir != DMA_NONE) { len = sizeof(struct set_dev_bits_fis); pm8001_dbg(pm8001_ha, IO, "FPDMA len = %d\n", len); @@ -4271,22 +4274,22 @@ static int pm8001_chip_sata_req(struct pm8001_hba_info *pm8001_ha, u32 opc = OPC_INB_SATA_HOST_OPSTART; memset(&sata_cmd, 0, sizeof(sata_cmd)); circularQ = &pm8001_ha->inbnd_q_tbl[0]; - if (task->data_dir == DMA_NONE) { + + if (task->data_dir == DMA_NONE && !task->ata_task.use_ncq) { ATAP = 0x04; /* no data*/ pm8001_dbg(pm8001_ha, IO, "no data\n"); } else if (likely(!task->ata_task.device_control_reg_update)) { - if (task->ata_task.dma_xfer) { + if (task->ata_task.use_ncq && + dev->sata_dev.class != ATA_DEV_ATAPI) { + ATAP = 0x07; /* FPDMA */ + pm8001_dbg(pm8001_ha, IO, "FPDMA\n"); + } else if (task->ata_task.dma_xfer) { ATAP = 0x06; /* DMA */ pm8001_dbg(pm8001_ha, IO, "DMA\n"); } else { ATAP = 0x05; /* PIO*/ pm8001_dbg(pm8001_ha, IO, "PIO\n"); } - if (task->ata_task.use_ncq && - dev->sata_dev.class != ATA_DEV_ATAPI) { - ATAP = 0x07; /* FPDMA */ - pm8001_dbg(pm8001_ha, IO, "FPDMA\n"); - } } if (task->ata_task.use_ncq && pm8001_get_ncq_tag(task, &hdr_tag)) { task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3); @@ -4626,7 +4629,7 @@ int pm8001_chip_ssp_tm_req(struct pm8001_hba_info *pm8001_ha, memcpy(sspTMCmd.lun, task->ssp_task.LUN, 8); sspTMCmd.tag = cpu_to_le32(ccb->ccb_tag); if (pm8001_ha->chip_id != chip_8001) - sspTMCmd.ds_ads_m = 0x08; + sspTMCmd.ds_ads_m = cpu_to_le32(0x08); circularQ = &pm8001_ha->inbnd_q_tbl[0]; ret = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sspTMCmd, sizeof(sspTMCmd), 0); diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 9d20f8009b89f..908dbac20b483 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -1203,9 +1203,11 @@ pm80xx_set_thermal_config(struct pm8001_hba_info *pm8001_ha) else page_code = THERMAL_PAGE_CODE_8H; - payload.cfg_pg[0] = (THERMAL_LOG_ENABLE << 9) | - (THERMAL_ENABLE << 8) | page_code; - payload.cfg_pg[1] = (LTEMPHIL << 24) | (RTEMPHIL << 8); + payload.cfg_pg[0] = + cpu_to_le32((THERMAL_LOG_ENABLE << 9) | + (THERMAL_ENABLE << 8) | page_code); + payload.cfg_pg[1] = + cpu_to_le32((LTEMPHIL << 24) | (RTEMPHIL << 8)); pm8001_dbg(pm8001_ha, DEV, "Setting up thermal config. cfg_pg 0 0x%x cfg_pg 1 0x%x\n", @@ -1245,43 +1247,41 @@ pm80xx_set_sas_protocol_timer_config(struct pm8001_hba_info *pm8001_ha) circularQ = &pm8001_ha->inbnd_q_tbl[0]; payload.tag = cpu_to_le32(tag); - SASConfigPage.pageCode = SAS_PROTOCOL_TIMER_CONFIG_PAGE; - SASConfigPage.MST_MSI = 3 << 15; - SASConfigPage.STP_SSP_MCT_TMO = (STP_MCT_TMO << 16) | SSP_MCT_TMO; - SASConfigPage.STP_FRM_TMO = (SAS_MAX_OPEN_TIME << 24) | - (SMP_MAX_CONN_TIMER << 16) | STP_FRM_TIMER; - SASConfigPage.STP_IDLE_TMO = STP_IDLE_TIME; - - if (SASConfigPage.STP_IDLE_TMO > 0x3FFFFFF) - SASConfigPage.STP_IDLE_TMO = 0x3FFFFFF; - - - SASConfigPage.OPNRJT_RTRY_INTVL = (SAS_MFD << 16) | - SAS_OPNRJT_RTRY_INTVL; - SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO = (SAS_DOPNRJT_RTRY_TMO << 16) - | SAS_COPNRJT_RTRY_TMO; - SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR = (SAS_DOPNRJT_RTRY_THR << 16) - | SAS_COPNRJT_RTRY_THR; - SASConfigPage.MAX_AIP = SAS_MAX_AIP; + SASConfigPage.pageCode = cpu_to_le32(SAS_PROTOCOL_TIMER_CONFIG_PAGE); + SASConfigPage.MST_MSI = cpu_to_le32(3 << 15); + SASConfigPage.STP_SSP_MCT_TMO = + cpu_to_le32((STP_MCT_TMO << 16) | SSP_MCT_TMO); + SASConfigPage.STP_FRM_TMO = + cpu_to_le32((SAS_MAX_OPEN_TIME << 24) | + (SMP_MAX_CONN_TIMER << 16) | STP_FRM_TIMER); + SASConfigPage.STP_IDLE_TMO = cpu_to_le32(STP_IDLE_TIME); + + SASConfigPage.OPNRJT_RTRY_INTVL = + cpu_to_le32((SAS_MFD << 16) | SAS_OPNRJT_RTRY_INTVL); + SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO = + cpu_to_le32((SAS_DOPNRJT_RTRY_TMO << 16) | SAS_COPNRJT_RTRY_TMO); + SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR = + cpu_to_le32((SAS_DOPNRJT_RTRY_THR << 16) | SAS_COPNRJT_RTRY_THR); + SASConfigPage.MAX_AIP = cpu_to_le32(SAS_MAX_AIP); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.pageCode 0x%08x\n", - SASConfigPage.pageCode); + le32_to_cpu(SASConfigPage.pageCode)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.MST_MSI 0x%08x\n", - SASConfigPage.MST_MSI); + le32_to_cpu(SASConfigPage.MST_MSI)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.STP_SSP_MCT_TMO 0x%08x\n", - SASConfigPage.STP_SSP_MCT_TMO); + le32_to_cpu(SASConfigPage.STP_SSP_MCT_TMO)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.STP_FRM_TMO 0x%08x\n", - SASConfigPage.STP_FRM_TMO); + le32_to_cpu(SASConfigPage.STP_FRM_TMO)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.STP_IDLE_TMO 0x%08x\n", - SASConfigPage.STP_IDLE_TMO); + le32_to_cpu(SASConfigPage.STP_IDLE_TMO)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.OPNRJT_RTRY_INTVL 0x%08x\n", - SASConfigPage.OPNRJT_RTRY_INTVL); + le32_to_cpu(SASConfigPage.OPNRJT_RTRY_INTVL)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO 0x%08x\n", - SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO); + le32_to_cpu(SASConfigPage.Data_Cmd_OPNRJT_RTRY_TMO)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR 0x%08x\n", - SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR); + le32_to_cpu(SASConfigPage.Data_Cmd_OPNRJT_RTRY_THR)); pm8001_dbg(pm8001_ha, INIT, "SASConfigPage.MAX_AIP 0x%08x\n", - SASConfigPage.MAX_AIP); + le32_to_cpu(SASConfigPage.MAX_AIP)); memcpy(&payload.cfg_pg, &SASConfigPage, sizeof(SASProtocolTimerConfig_t)); @@ -1407,12 +1407,13 @@ static int pm80xx_encrypt_update(struct pm8001_hba_info *pm8001_ha) /* Currently only one key is used. New KEK index is 1. * Current KEK index is 1. Store KEK to NVRAM is 1. */ - payload.new_curidx_ksop = ((1 << 24) | (1 << 16) | (1 << 8) | - KEK_MGMT_SUBOP_KEYCARDUPDATE); + payload.new_curidx_ksop = + cpu_to_le32(((1 << 24) | (1 << 16) | (1 << 8) | + KEK_MGMT_SUBOP_KEYCARDUPDATE)); pm8001_dbg(pm8001_ha, DEV, "Saving Encryption info to flash. payload 0x%x\n", - payload.new_curidx_ksop); + le32_to_cpu(payload.new_curidx_ksop)); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &payload, sizeof(payload), 0); @@ -1801,6 +1802,7 @@ static void pm80xx_send_abort_all(struct pm8001_hba_info *pm8001_ha, ccb->device = pm8001_ha_dev; ccb->ccb_tag = ccb_tag; ccb->task = task; + ccb->n_elem = 0; circularQ = &pm8001_ha->inbnd_q_tbl[0]; @@ -1882,7 +1884,7 @@ static void pm80xx_send_read_log(struct pm8001_hba_info *pm8001_ha, sata_cmd.tag = cpu_to_le32(ccb_tag); sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id); - sata_cmd.ncqtag_atap_dir_m_dad |= ((0x1 << 7) | (0x5 << 9)); + sata_cmd.ncqtag_atap_dir_m_dad = cpu_to_le32(((0x1 << 7) | (0x5 << 9))); memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis)); res = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &sata_cmd, @@ -2510,7 +2512,8 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, len = sizeof(struct pio_setup_fis); pm8001_dbg(pm8001_ha, IO, "PIO read len = %d\n", len); - } else if (t->ata_task.use_ncq) { + } else if (t->ata_task.use_ncq && + t->data_dir != DMA_NONE) { len = sizeof(struct set_dev_bits_fis); pm8001_dbg(pm8001_ha, IO, "FPDMA len = %d\n", len); @@ -4379,13 +4382,15 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, struct ssp_ini_io_start_req ssp_cmd; u32 tag = ccb->ccb_tag; int ret; - u64 phys_addr, start_addr, end_addr; + u64 phys_addr, end_addr; u32 end_addr_high, end_addr_low; struct inbound_queue_table *circularQ; u32 q_index, cpu_id; u32 opc = OPC_INB_SSPINIIOSTART; + memset(&ssp_cmd, 0, sizeof(ssp_cmd)); memcpy(ssp_cmd.ssp_iu.lun, task->ssp_task.LUN, 8); + /* data address domain added for spcv; set to 0 by host, * used internally by controller * 0 for SAS 1.1 and SAS 2.0 compatible TLR @@ -4396,7 +4401,7 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, ssp_cmd.device_id = cpu_to_le32(pm8001_dev->device_id); ssp_cmd.tag = cpu_to_le32(tag); if (task->ssp_task.enable_first_burst) - ssp_cmd.ssp_iu.efb_prio_attr |= 0x80; + ssp_cmd.ssp_iu.efb_prio_attr = 0x80; ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_prio << 3); ssp_cmd.ssp_iu.efb_prio_attr |= (task->ssp_task.task_attr & 7); memcpy(ssp_cmd.ssp_iu.cdb, task->ssp_task.cmd->cmnd, @@ -4428,21 +4433,24 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, ssp_cmd.enc_esgl = cpu_to_le32(1<<31); } else if (task->num_scatter == 1) { u64 dma_addr = sg_dma_address(task->scatter); + ssp_cmd.enc_addr_low = cpu_to_le32(lower_32_bits(dma_addr)); ssp_cmd.enc_addr_high = cpu_to_le32(upper_32_bits(dma_addr)); ssp_cmd.enc_len = cpu_to_le32(task->total_xfer_len); ssp_cmd.enc_esgl = 0; + /* Check 4G Boundary */ - start_addr = cpu_to_le64(dma_addr); - end_addr = (start_addr + ssp_cmd.enc_len) - 1; - end_addr_low = cpu_to_le32(lower_32_bits(end_addr)); - end_addr_high = cpu_to_le32(upper_32_bits(end_addr)); - if (end_addr_high != ssp_cmd.enc_addr_high) { + end_addr = dma_addr + le32_to_cpu(ssp_cmd.enc_len) - 1; + end_addr_low = lower_32_bits(end_addr); + end_addr_high = upper_32_bits(end_addr); + + if (end_addr_high != le32_to_cpu(ssp_cmd.enc_addr_high)) { pm8001_dbg(pm8001_ha, FAIL, "The sg list address start_addr=0x%016llx data_len=0x%x end_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n", - start_addr, ssp_cmd.enc_len, + dma_addr, + le32_to_cpu(ssp_cmd.enc_len), end_addr_high, end_addr_low); pm8001_chip_make_sg(task->scatter, 1, ccb->buf_prd); @@ -4451,7 +4459,7 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, cpu_to_le32(lower_32_bits(phys_addr)); ssp_cmd.enc_addr_high = cpu_to_le32(upper_32_bits(phys_addr)); - ssp_cmd.enc_esgl = cpu_to_le32(1<<31); + ssp_cmd.enc_esgl = cpu_to_le32(1U<<31); } } else if (task->num_scatter == 0) { ssp_cmd.enc_addr_low = 0; @@ -4459,8 +4467,10 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, ssp_cmd.enc_len = cpu_to_le32(task->total_xfer_len); ssp_cmd.enc_esgl = 0; } + /* XTS mode. All other fields are 0 */ - ssp_cmd.key_cmode = 0x6 << 4; + ssp_cmd.key_cmode = cpu_to_le32(0x6 << 4); + /* set tweak values. Should be the start lba */ ssp_cmd.twk_val0 = cpu_to_le32((task->ssp_task.cmd->cmnd[2] << 24) | (task->ssp_task.cmd->cmnd[3] << 16) | @@ -4482,20 +4492,22 @@ static int pm80xx_chip_ssp_io_req(struct pm8001_hba_info *pm8001_ha, ssp_cmd.esgl = cpu_to_le32(1<<31); } else if (task->num_scatter == 1) { u64 dma_addr = sg_dma_address(task->scatter); + ssp_cmd.addr_low = cpu_to_le32(lower_32_bits(dma_addr)); ssp_cmd.addr_high = cpu_to_le32(upper_32_bits(dma_addr)); ssp_cmd.len = cpu_to_le32(task->total_xfer_len); ssp_cmd.esgl = 0; + /* Check 4G Boundary */ - start_addr = cpu_to_le64(dma_addr); - end_addr = (start_addr + ssp_cmd.len) - 1; - end_addr_low = cpu_to_le32(lower_32_bits(end_addr)); - end_addr_high = cpu_to_le32(upper_32_bits(end_addr)); - if (end_addr_high != ssp_cmd.addr_high) { + end_addr = dma_addr + le32_to_cpu(ssp_cmd.len) - 1; + end_addr_low = lower_32_bits(end_addr); + end_addr_high = upper_32_bits(end_addr); + if (end_addr_high != le32_to_cpu(ssp_cmd.addr_high)) { pm8001_dbg(pm8001_ha, FAIL, "The sg list address start_addr=0x%016llx data_len=0x%x end_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n", - start_addr, ssp_cmd.len, + dma_addr, + le32_to_cpu(ssp_cmd.len), end_addr_high, end_addr_low); pm8001_chip_make_sg(task->scatter, 1, ccb->buf_prd); @@ -4530,7 +4542,7 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, u32 q_index, cpu_id; struct sata_start_req sata_cmd; u32 hdr_tag, ncg_tag = 0; - u64 phys_addr, start_addr, end_addr; + u64 phys_addr, end_addr; u32 end_addr_high, end_addr_low; u32 ATAP = 0x0; u32 dir; @@ -4542,22 +4554,21 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, q_index = (u32) (cpu_id) % (pm8001_ha->max_q_num); circularQ = &pm8001_ha->inbnd_q_tbl[q_index]; - if (task->data_dir == DMA_NONE) { + if (task->data_dir == DMA_NONE && !task->ata_task.use_ncq) { ATAP = 0x04; /* no data*/ pm8001_dbg(pm8001_ha, IO, "no data\n"); } else if (likely(!task->ata_task.device_control_reg_update)) { - if (task->ata_task.dma_xfer) { + if (task->ata_task.use_ncq && + dev->sata_dev.class != ATA_DEV_ATAPI) { + ATAP = 0x07; /* FPDMA */ + pm8001_dbg(pm8001_ha, IO, "FPDMA\n"); + } else if (task->ata_task.dma_xfer) { ATAP = 0x06; /* DMA */ pm8001_dbg(pm8001_ha, IO, "DMA\n"); } else { ATAP = 0x05; /* PIO*/ pm8001_dbg(pm8001_ha, IO, "PIO\n"); } - if (task->ata_task.use_ncq && - dev->sata_dev.class != ATA_DEV_ATAPI) { - ATAP = 0x07; /* FPDMA */ - pm8001_dbg(pm8001_ha, IO, "FPDMA\n"); - } } if (task->ata_task.use_ncq && pm8001_get_ncq_tag(task, &hdr_tag)) { task->ata_task.fis.sector_count |= (u8) (hdr_tag << 3); @@ -4591,32 +4602,38 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, pm8001_chip_make_sg(task->scatter, ccb->n_elem, ccb->buf_prd); phys_addr = ccb->ccb_dma_handle; - sata_cmd.enc_addr_low = lower_32_bits(phys_addr); - sata_cmd.enc_addr_high = upper_32_bits(phys_addr); + sata_cmd.enc_addr_low = + cpu_to_le32(lower_32_bits(phys_addr)); + sata_cmd.enc_addr_high = + cpu_to_le32(upper_32_bits(phys_addr)); sata_cmd.enc_esgl = cpu_to_le32(1 << 31); } else if (task->num_scatter == 1) { u64 dma_addr = sg_dma_address(task->scatter); - sata_cmd.enc_addr_low = lower_32_bits(dma_addr); - sata_cmd.enc_addr_high = upper_32_bits(dma_addr); + + sata_cmd.enc_addr_low = + cpu_to_le32(lower_32_bits(dma_addr)); + sata_cmd.enc_addr_high = + cpu_to_le32(upper_32_bits(dma_addr)); sata_cmd.enc_len = cpu_to_le32(task->total_xfer_len); sata_cmd.enc_esgl = 0; + /* Check 4G Boundary */ - start_addr = cpu_to_le64(dma_addr); - end_addr = (start_addr + sata_cmd.enc_len) - 1; - end_addr_low = cpu_to_le32(lower_32_bits(end_addr)); - end_addr_high = cpu_to_le32(upper_32_bits(end_addr)); - if (end_addr_high != sata_cmd.enc_addr_high) { + end_addr = dma_addr + le32_to_cpu(sata_cmd.enc_len) - 1; + end_addr_low = lower_32_bits(end_addr); + end_addr_high = upper_32_bits(end_addr); + if (end_addr_high != le32_to_cpu(sata_cmd.enc_addr_high)) { pm8001_dbg(pm8001_ha, FAIL, "The sg list address start_addr=0x%016llx data_len=0x%x end_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n", - start_addr, sata_cmd.enc_len, + dma_addr, + le32_to_cpu(sata_cmd.enc_len), end_addr_high, end_addr_low); pm8001_chip_make_sg(task->scatter, 1, ccb->buf_prd); phys_addr = ccb->ccb_dma_handle; sata_cmd.enc_addr_low = - lower_32_bits(phys_addr); + cpu_to_le32(lower_32_bits(phys_addr)); sata_cmd.enc_addr_high = - upper_32_bits(phys_addr); + cpu_to_le32(upper_32_bits(phys_addr)); sata_cmd.enc_esgl = cpu_to_le32(1 << 31); } @@ -4627,7 +4644,8 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, sata_cmd.enc_esgl = 0; } /* XTS mode. All other fields are 0 */ - sata_cmd.key_index_mode = 0x6 << 4; + sata_cmd.key_index_mode = cpu_to_le32(0x6 << 4); + /* set tweak values. Should be the start lba */ sata_cmd.twk_val0 = cpu_to_le32((sata_cmd.sata_fis.lbal_exp << 24) | @@ -4653,31 +4671,31 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, phys_addr = ccb->ccb_dma_handle; sata_cmd.addr_low = lower_32_bits(phys_addr); sata_cmd.addr_high = upper_32_bits(phys_addr); - sata_cmd.esgl = cpu_to_le32(1 << 31); + sata_cmd.esgl = cpu_to_le32(1U << 31); } else if (task->num_scatter == 1) { u64 dma_addr = sg_dma_address(task->scatter); + sata_cmd.addr_low = lower_32_bits(dma_addr); sata_cmd.addr_high = upper_32_bits(dma_addr); sata_cmd.len = cpu_to_le32(task->total_xfer_len); sata_cmd.esgl = 0; + /* Check 4G Boundary */ - start_addr = cpu_to_le64(dma_addr); - end_addr = (start_addr + sata_cmd.len) - 1; - end_addr_low = cpu_to_le32(lower_32_bits(end_addr)); - end_addr_high = cpu_to_le32(upper_32_bits(end_addr)); + end_addr = dma_addr + le32_to_cpu(sata_cmd.len) - 1; + end_addr_low = lower_32_bits(end_addr); + end_addr_high = upper_32_bits(end_addr); if (end_addr_high != sata_cmd.addr_high) { pm8001_dbg(pm8001_ha, FAIL, "The sg list address start_addr=0x%016llx data_len=0x%xend_addr_high=0x%08x end_addr_low=0x%08x has crossed 4G boundary\n", - start_addr, sata_cmd.len, + dma_addr, + le32_to_cpu(sata_cmd.len), end_addr_high, end_addr_low); pm8001_chip_make_sg(task->scatter, 1, ccb->buf_prd); phys_addr = ccb->ccb_dma_handle; - sata_cmd.addr_low = - lower_32_bits(phys_addr); - sata_cmd.addr_high = - upper_32_bits(phys_addr); - sata_cmd.esgl = cpu_to_le32(1 << 31); + sata_cmd.addr_low = lower_32_bits(phys_addr); + sata_cmd.addr_high = upper_32_bits(phys_addr); + sata_cmd.esgl = cpu_to_le32(1U << 31); } } else if (task->num_scatter == 0) { sata_cmd.addr_low = 0; @@ -4685,27 +4703,28 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha, sata_cmd.len = cpu_to_le32(task->total_xfer_len); sata_cmd.esgl = 0; } + /* scsi cdb */ sata_cmd.atapi_scsi_cdb[0] = cpu_to_le32(((task->ata_task.atapi_packet[0]) | - (task->ata_task.atapi_packet[1] << 8) | - (task->ata_task.atapi_packet[2] << 16) | - (task->ata_task.atapi_packet[3] << 24))); + (task->ata_task.atapi_packet[1] << 8) | + (task->ata_task.atapi_packet[2] << 16) | + (task->ata_task.atapi_packet[3] << 24))); sata_cmd.atapi_scsi_cdb[1] = cpu_to_le32(((task->ata_task.atapi_packet[4]) | - (task->ata_task.atapi_packet[5] << 8) | - (task->ata_task.atapi_packet[6] << 16) | - (task->ata_task.atapi_packet[7] << 24))); + (task->ata_task.atapi_packet[5] << 8) | + (task->ata_task.atapi_packet[6] << 16) | + (task->ata_task.atapi_packet[7] << 24))); sata_cmd.atapi_scsi_cdb[2] = cpu_to_le32(((task->ata_task.atapi_packet[8]) | - (task->ata_task.atapi_packet[9] << 8) | - (task->ata_task.atapi_packet[10] << 16) | - (task->ata_task.atapi_packet[11] << 24))); + (task->ata_task.atapi_packet[9] << 8) | + (task->ata_task.atapi_packet[10] << 16) | + (task->ata_task.atapi_packet[11] << 24))); sata_cmd.atapi_scsi_cdb[3] = cpu_to_le32(((task->ata_task.atapi_packet[12]) | - (task->ata_task.atapi_packet[13] << 8) | - (task->ata_task.atapi_packet[14] << 16) | - (task->ata_task.atapi_packet[15] << 24))); + (task->ata_task.atapi_packet[13] << 8) | + (task->ata_task.atapi_packet[14] << 16) | + (task->ata_task.atapi_packet[15] << 24))); } /* Check for read log for failed drive and return */ diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index db55737000ab5..3b3e4234f37a0 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -555,7 +555,7 @@ qla2x00_sysfs_read_vpd(struct file *filp, struct kobject *kobj, if (!capable(CAP_SYS_ADMIN)) return -EINVAL; - if (IS_NOCACHE_VPD_TYPE(ha)) + if (!IS_NOCACHE_VPD_TYPE(ha)) goto skip; faddr = ha->flt_region_vpd << 2; @@ -745,7 +745,7 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, ql_log(ql_log_info, vha, 0x706f, "Issuing MPI reset.\n"); - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { + if (IS_QLA83XX(ha)) { uint32_t idc_control; qla83xx_idc_lock(vha, 0); @@ -1056,9 +1056,6 @@ qla2x00_free_sysfs_attr(scsi_qla_host_t *vha, bool stop_beacon) continue; if (iter->type == 3 && !(IS_CNA_CAPABLE(ha))) continue; - if (iter->type == 0x27 && - (!IS_QLA27XX(ha) || !IS_QLA28XX(ha))) - continue; sysfs_remove_bin_file(&host->shost_gendev.kobj, iter->attr); diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 9da8034ccad40..c2f00f076f799 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -29,7 +29,8 @@ void qla2x00_bsg_job_done(srb_t *sp, int res) "%s: sp hdl %x, result=%x bsg ptr %p\n", __func__, sp->handle, res, bsg_job); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); bsg_reply->result = res; bsg_job_done(bsg_job, bsg_reply->result, @@ -3013,7 +3014,8 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job) done: spin_unlock_irqrestore(&ha->hardware_lock, flags); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return 0; } diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 9ebf4a234d9a9..aefb29d7c7aee 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -726,6 +726,11 @@ typedef struct srb { * code. */ void (*put_fn)(struct kref *kref); + + /* + * Report completion for asynchronous commands. + */ + void (*async_done)(struct srb *sp, int res); } srb_t; #define GET_CMD_SP(sp) (sp->u.scmd.cmd) @@ -2886,7 +2891,11 @@ struct ct_fdmi2_hba_attributes { #define FDMI_PORT_SPEED_8GB 0x10 #define FDMI_PORT_SPEED_16GB 0x20 #define FDMI_PORT_SPEED_32GB 0x40 -#define FDMI_PORT_SPEED_64GB 0x80 +#define FDMI_PORT_SPEED_20GB 0x80 +#define FDMI_PORT_SPEED_40GB 0x100 +#define FDMI_PORT_SPEED_128GB 0x200 +#define FDMI_PORT_SPEED_64GB 0x400 +#define FDMI_PORT_SPEED_256GB 0x800 #define FDMI_PORT_SPEED_UNKNOWN 0x8000 #define FC_CLASS_2 0x04 @@ -4262,8 +4271,10 @@ struct qla_hw_data { #define QLA_ABTS_WAIT_ENABLED(_sp) \ (QLA_NVME_IOS(_sp) && QLA_ABTS_FW_ENABLED(_sp->fcport->vha->hw)) -#define IS_PI_UNINIT_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha)) -#define IS_PI_IPGUARD_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha)) +#define IS_PI_UNINIT_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \ + IS_QLA28XX(ha)) +#define IS_PI_IPGUARD_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \ + IS_QLA28XX(ha)) #define IS_PI_DIFB_DIX0_CAPABLE(ha) (0) #define IS_PI_SPLIT_DET_CAPABLE_HBA(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \ IS_QLA28XX(ha)) @@ -4610,6 +4621,7 @@ struct qla_hw_data { struct workqueue_struct *wq; struct work_struct heartbeat_work; struct qlfc_fw fw_buf; + unsigned long last_heartbeat_run_jiffies; /* FCP_CMND priority support */ struct qla_fcp_prio_cfg *fcp_prio_cfg; @@ -5427,4 +5439,8 @@ struct ql_vnd_tgt_stats_resp { #include "qla_gbl.h" #include "qla_dbg.h" #include "qla_inline.h" + +#define IS_SESSION_DELETED(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \ + _fcport->disc_state == DSC_DELETED) + #endif diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index 53d2b85620271..0628633c7c7e9 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -668,6 +668,11 @@ qla_edif_app_authok(scsi_qla_host_t *vha, struct bsg_job *bsg_job) bsg_job->request_payload.sg_cnt, &appplogiok, sizeof(struct auth_complete_cmd)); + /* silent unaligned access warning */ + portid.b.domain = appplogiok.u.d_id.b.domain; + portid.b.area = appplogiok.u.d_id.b.area; + portid.b.al_pa = appplogiok.u.d_id.b.al_pa; + switch (appplogiok.type) { case PL_TYPE_WWPN: fcport = qla2x00_find_fcport_by_wwpn(vha, @@ -678,7 +683,7 @@ qla_edif_app_authok(scsi_qla_host_t *vha, struct bsg_job *bsg_job) __func__, appplogiok.u.wwpn); break; case PL_TYPE_DID: - fcport = qla2x00_find_fcport_by_pid(vha, &appplogiok.u.d_id); + fcport = qla2x00_find_fcport_by_pid(vha, &portid); if (!fcport) ql_dbg(ql_dbg_edif, vha, 0x911d, "%s d_id lookup failed: %x\n", __func__, @@ -777,6 +782,11 @@ qla_edif_app_authfail(scsi_qla_host_t *vha, struct bsg_job *bsg_job) bsg_job->request_payload.sg_cnt, &appplogifail, sizeof(struct auth_complete_cmd)); + /* silent unaligned access warning */ + portid.b.domain = appplogifail.u.d_id.b.domain; + portid.b.area = appplogifail.u.d_id.b.area; + portid.b.al_pa = appplogifail.u.d_id.b.al_pa; + /* * TODO: edif: app has failed this plogi. Inform driver to * take any action (if any). @@ -788,7 +798,7 @@ qla_edif_app_authfail(scsi_qla_host_t *vha, struct bsg_job *bsg_job) SET_DID_STATUS(bsg_reply->result, DID_OK); break; case PL_TYPE_DID: - fcport = qla2x00_find_fcport_by_pid(vha, &appplogifail.u.d_id); + fcport = qla2x00_find_fcport_by_pid(vha, &portid); if (!fcport) ql_dbg(ql_dbg_edif, vha, 0x911d, "%s d_id lookup failed: %x\n", __func__, @@ -1253,6 +1263,7 @@ qla24xx_sadb_update(struct bsg_job *bsg_job) int result = 0; struct qla_sa_update_frame sa_frame; struct srb_iocb *iocb_cmd; + port_id_t portid; ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x911d, "%s entered, vha: 0x%p\n", __func__, vha); @@ -1276,7 +1287,12 @@ qla24xx_sadb_update(struct bsg_job *bsg_job) goto done; } - fcport = qla2x00_find_fcport_by_pid(vha, &sa_frame.port_id); + /* silent unaligned access warning */ + portid.b.domain = sa_frame.port_id.b.domain; + portid.b.area = sa_frame.port_id.b.area; + portid.b.al_pa = sa_frame.port_id.b.al_pa; + + fcport = qla2x00_find_fcport_by_pid(vha, &portid); if (fcport) { found = 1; if (sa_frame.flags == QLA_SA_UPDATE_FLAGS_TX_KEY) @@ -2146,7 +2162,8 @@ edif_doorbell_show(struct device *dev, struct device_attribute *attr, static void qla_noop_sp_done(srb_t *sp, int res) { - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } /* diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 8d8503a284790..3f8b8bbabe6de 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -316,7 +316,8 @@ extern int qla2x00_start_sp(srb_t *); extern int qla24xx_dif_start_scsi(srb_t *); extern int qla2x00_start_bidir(srb_t *, struct scsi_qla_host *, uint32_t); extern int qla2xxx_dif_start_scsi_mq(srb_t *); -extern void qla2x00_init_timer(srb_t *sp, unsigned long tmo); +extern void qla2x00_init_async_sp(srb_t *sp, unsigned long tmo, + void (*done)(struct srb *, int)); extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *); extern void *qla2x00_alloc_iocbs(struct scsi_qla_host *, srb_t *); @@ -332,6 +333,7 @@ extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *); extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *); extern int qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e); +void qla2x00_sp_release(struct kref *kref); /* * Global Function Prototypes in qla_mbx.c source file. diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 28b574e20ef32..6b67bd561810d 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -529,7 +529,6 @@ static void qla2x00_async_sns_sp_done(srb_t *sp, int rc) if (!e) goto err2; - del_timer(&sp->u.iocb_cmd.timer); e->u.iosb.sp = sp; qla2x00_post_work(vha, e); return; @@ -556,8 +555,8 @@ static void qla2x00_async_sns_sp_done(srb_t *sp, int rc) sp->u.iocb_cmd.u.ctarg.rsp = NULL; } - sp->free(sp); - + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } @@ -592,13 +591,15 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id) if (!vha->flags.online) goto done; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_CT_PTHRU_CMD; sp->name = "rft_id"; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_sns_sp_done); sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma, @@ -638,8 +639,6 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id) sp->u.iocb_cmd.u.ctarg.req_size = RFT_ID_REQ_SIZE; sp->u.iocb_cmd.u.ctarg.rsp_size = RFT_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_sns_sp_done; ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - hdl=%x portid %06x.\n", @@ -653,7 +652,8 @@ static int qla_async_rftid(scsi_qla_host_t *vha, port_id_t *d_id) } return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -676,8 +676,7 @@ qla2x00_rff_id(scsi_qla_host_t *vha, u8 type) return (QLA_SUCCESS); } - return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha), - FC4_TYPE_FCP_SCSI); + return qla_async_rffid(vha, &vha->d_id, qlt_rff_id(vha), type); } static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, @@ -688,13 +687,15 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, srb_t *sp; struct ct_sns_pkt *ct_sns; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_CT_PTHRU_CMD; sp->name = "rff_id"; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_sns_sp_done); sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma, @@ -727,13 +728,11 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, /* Prepare CT arguments -- port_id, FC-4 feature, FC-4 type */ ct_req->req.rff_id.port_id = port_id_to_be_id(*d_id); ct_req->req.rff_id.fc4_feature = fc4feature; - ct_req->req.rff_id.fc4_type = fc4type; /* SCSI - FCP */ + ct_req->req.rff_id.fc4_type = fc4type; /* SCSI-FCP or FC-NVMe */ sp->u.iocb_cmd.u.ctarg.req_size = RFF_ID_REQ_SIZE; sp->u.iocb_cmd.u.ctarg.rsp_size = RFF_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_sns_sp_done; ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - hdl=%x portid %06x feature %x type %x.\n", @@ -749,7 +748,8 @@ static int qla_async_rffid(scsi_qla_host_t *vha, port_id_t *d_id, return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -779,13 +779,15 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id, srb_t *sp; struct ct_sns_pkt *ct_sns; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_CT_PTHRU_CMD; sp->name = "rnid"; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_sns_sp_done); sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma, @@ -823,9 +825,6 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id, sp->u.iocb_cmd.u.ctarg.rsp_size = RNN_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_sns_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - hdl=%x portid %06x\n", sp->name, sp->handle, d_id->b24); @@ -840,7 +839,8 @@ static int qla_async_rnnid(scsi_qla_host_t *vha, port_id_t *d_id, return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -886,13 +886,15 @@ static int qla_async_rsnn_nn(scsi_qla_host_t *vha) srb_t *sp; struct ct_sns_pkt *ct_sns; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_CT_PTHRU_CMD; sp->name = "rsnn_nn"; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_sns_sp_done); sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma, @@ -936,9 +938,6 @@ static int qla_async_rsnn_nn(scsi_qla_host_t *vha) sp->u.iocb_cmd.u.ctarg.rsp_size = RSNN_NN_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_sns_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - hdl=%x.\n", sp->name, sp->handle); @@ -953,7 +952,8 @@ static int qla_async_rsnn_nn(scsi_qla_host_t *vha) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -2893,7 +2893,8 @@ static void qla24xx_async_gpsc_sp_done(srb_t *sp, int res) qla24xx_handle_gpsc_event(vha, &ea); done: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) @@ -2905,6 +2906,7 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; @@ -2913,8 +2915,8 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) sp->name = "gpsc"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla24xx_async_gpsc_sp_done); /* CT_IU preamble */ ct_req = qla24xx_prep_ct_fm_req(fcport->ct_desc.ct_sns, GPSC_CMD, @@ -2932,9 +2934,6 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) sp->u.iocb_cmd.u.ctarg.rsp_size = GPSC_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = vha->mgmt_svr_loop_id; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla24xx_async_gpsc_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x205e, "Async-%s %8phC hdl=%x loopid=%x portid=%02x%02x%02x.\n", sp->name, fcport->port_name, sp->handle, @@ -2947,7 +2946,8 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -2996,7 +2996,8 @@ void qla24xx_sp_unmap(scsi_qla_host_t *vha, srb_t *sp) break; } - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea) @@ -3135,13 +3136,15 @@ static void qla2x00_async_gpnid_sp_done(srb_t *sp, int res) if (res) { if (res == QLA_FUNCTION_TIMEOUT) { qla24xx_post_gpnid_work(sp->vha, &ea.id); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } } else if (sp->gen1) { /* There was another RSCN for this Nport ID */ qla24xx_post_gpnid_work(sp->vha, &ea.id); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } @@ -3162,7 +3165,8 @@ static void qla2x00_async_gpnid_sp_done(srb_t *sp, int res) sp->u.iocb_cmd.u.ctarg.rsp_dma); sp->u.iocb_cmd.u.ctarg.rsp = NULL; - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } @@ -3182,6 +3186,7 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) if (!vha->flags.online) goto done; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; @@ -3190,14 +3195,16 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) sp->name = "gpnid"; sp->u.iocb_cmd.u.ctarg.id = *id; sp->gen1 = 0; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gpnid_sp_done); spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); list_for_each_entry(tsp, &vha->gpnid_list, elem) { if (tsp->u.iocb_cmd.u.ctarg.id.b24 == id->b24) { tsp->gen1++; spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); goto done; } } @@ -3238,9 +3245,6 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) sp->u.iocb_cmd.u.ctarg.rsp_size = GPN_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - sp->done = qla2x00_async_gpnid_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x2067, "Async-%s hdl=%x ID %3phC.\n", sp->name, sp->handle, &ct_req->req.port_id.port_id); @@ -3270,8 +3274,8 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) sp->u.iocb_cmd.u.ctarg.rsp_dma); sp->u.iocb_cmd.u.ctarg.rsp = NULL; } - - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -3326,7 +3330,8 @@ void qla24xx_async_gffid_sp_done(srb_t *sp, int res) ea.rc = res; qla24xx_handle_gffid_event(vha, &ea); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } /* Get FC4 Feature with Nport ID. */ @@ -3339,6 +3344,7 @@ int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) return rval; @@ -3348,9 +3354,8 @@ int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->name = "gffid"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla24xx_async_gffid_sp_done); /* CT_IU preamble */ ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GFF_ID_CMD, @@ -3368,8 +3373,6 @@ int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->u.iocb_cmd.u.ctarg.rsp_size = GFF_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla24xx_async_gffid_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x2132, "Async-%s hdl=%x %8phC.\n", sp->name, sp->handle, fcport->port_name); @@ -3380,7 +3383,8 @@ int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; return rval; } @@ -3767,7 +3771,6 @@ static void qla2x00_async_gpnft_gnnft_sp_done(srb_t *sp, int res) "Async done-%s res %x FC4Type %x\n", sp->name, res, sp->gen2); - del_timer(&sp->u.iocb_cmd.timer); sp->rc = res; if (res) { unsigned long flags; @@ -3892,9 +3895,8 @@ static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp, sp->name = "gnnft"; sp->gen1 = vha->hw->base_qpair->chip_reset; sp->gen2 = fc4_type; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gpnft_gnnft_sp_done); memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size); memset(sp->u.iocb_cmd.u.ctarg.req, 0, sp->u.iocb_cmd.u.ctarg.req_size); @@ -3910,8 +3912,6 @@ static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp, sp->u.iocb_cmd.u.ctarg.req_size = GNN_FT_REQ_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla2x00_async_gpnft_gnnft_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s hdl=%x FC4Type %x.\n", sp->name, sp->handle, ct_req->req.gpn_ft.port_type); @@ -3938,8 +3938,8 @@ static int qla24xx_async_gnnft(scsi_qla_host_t *vha, struct srb *sp, sp->u.iocb_cmd.u.ctarg.rsp_dma); sp->u.iocb_cmd.u.ctarg.rsp = NULL; } - - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); spin_lock_irqsave(&vha->work_lock, flags); vha->scan.scan_flags &= ~SF_SCANNING; @@ -3991,9 +3991,12 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff, "%s: Performing FCP Scan\n", __func__); - if (sp) - sp->free(sp); /* should not happen */ + if (sp) { + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); + } + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) { spin_lock_irqsave(&vha->work_lock, flags); @@ -4038,6 +4041,7 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) sp->u.iocb_cmd.u.ctarg.req, sp->u.iocb_cmd.u.ctarg.req_dma); sp->u.iocb_cmd.u.ctarg.req = NULL; + /* ref: INIT */ qla2x00_rel_sp(sp); return rval; } @@ -4057,9 +4061,8 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) sp->name = "gpnft"; sp->gen1 = vha->hw->base_qpair->chip_reset; sp->gen2 = fc4_type; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gpnft_gnnft_sp_done); rspsz = sp->u.iocb_cmd.u.ctarg.rsp_size; memset(sp->u.iocb_cmd.u.ctarg.rsp, 0, sp->u.iocb_cmd.u.ctarg.rsp_size); @@ -4074,8 +4077,6 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla2x00_async_gpnft_gnnft_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s hdl=%x FC4Type %x.\n", sp->name, sp->handle, ct_req->req.gpn_ft.port_type); @@ -4103,7 +4104,8 @@ int qla24xx_async_gpnft(scsi_qla_host_t *vha, u8 fc4_type, srb_t *sp) sp->u.iocb_cmd.u.ctarg.rsp = NULL; } - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); spin_lock_irqsave(&vha->work_lock, flags); vha->scan.scan_flags &= ~SF_SCANNING; @@ -4167,7 +4169,8 @@ static void qla2x00_async_gnnid_sp_done(srb_t *sp, int res) qla24xx_handle_gnnid_event(vha, &ea); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) @@ -4180,6 +4183,7 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; qla2x00_set_fcport_disc_state(fcport, DSC_GNN_ID); + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC); if (!sp) goto done; @@ -4189,9 +4193,8 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->name = "gnnid"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gnnid_sp_done); /* CT_IU preamble */ ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GNN_ID_CMD, @@ -4210,8 +4213,6 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->u.iocb_cmd.u.ctarg.rsp_size = GNN_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla2x00_async_gnnid_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - %8phC hdl=%x loopid=%x portid %06x.\n", sp->name, fcport->port_name, @@ -4223,7 +4224,8 @@ int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; done: return rval; @@ -4297,7 +4299,8 @@ static void qla2x00_async_gfpnid_sp_done(srb_t *sp, int res) qla24xx_handle_gfpnid_event(vha, &ea); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) @@ -4309,6 +4312,7 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC); if (!sp) goto done; @@ -4317,9 +4321,8 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->name = "gfpnid"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_gfpnid_sp_done); /* CT_IU preamble */ ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GFPN_ID_CMD, @@ -4338,8 +4341,6 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) sp->u.iocb_cmd.u.ctarg.rsp_size = GFPN_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - sp->done = qla2x00_async_gfpnid_sp_done; - ql_dbg(ql_dbg_disc, vha, 0xffff, "Async-%s - %8phC hdl=%x loopid=%x portid %06x.\n", sp->name, fcport->port_name, @@ -4352,7 +4353,8 @@ int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 1fe4966fc2f68..7f81525c4fb32 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -51,6 +51,9 @@ qla2x00_sp_timeout(struct timer_list *t) WARN_ON(irqs_disabled()); iocb = &sp->u.iocb_cmd; iocb->timeout(sp); + + /* ref: TMR */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } void qla2x00_sp_free(srb_t *sp) @@ -125,8 +128,13 @@ static void qla24xx_abort_iocb_timeout(void *data) } spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - if (sp->cmd_sp) + if (sp->cmd_sp) { + /* + * This done function should take care of + * original command ref: INIT + */ sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED); + } abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT); sp->done(sp, QLA_OS_TIMER_EXPIRED); @@ -140,11 +148,11 @@ static void qla24xx_abort_sp_done(srb_t *sp, int res) if (orig_sp) qla_wait_nvme_release_cmd_kref(orig_sp); - del_timer(&sp->u.iocb_cmd.timer); if (sp->flags & SRB_WAKEUP_ON_COMP) complete(&abt->u.abt.comp); else - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) @@ -154,6 +162,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) srb_t *sp; int rval = QLA_FUNCTION_FAILED; + /* ref: INIT for ABTS command */ sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport, GFP_ATOMIC); if (!sp) @@ -167,23 +176,22 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) if (wait) sp->flags = SRB_WAKEUP_ON_COMP; - abt_iocb->timeout = qla24xx_abort_iocb_timeout; init_completion(&abt_iocb->u.abt.comp); /* FW can send 2 x ABTS's timeout/20s */ - qla2x00_init_timer(sp, 42); + qla2x00_init_async_sp(sp, 42, qla24xx_abort_sp_done); + sp->u.iocb_cmd.timeout = qla24xx_abort_iocb_timeout; abt_iocb->u.abt.cmd_hndl = cmd_sp->handle; abt_iocb->u.abt.req_que_no = cpu_to_le16(cmd_sp->qpair->req->id); - sp->done = qla24xx_abort_sp_done; - ql_dbg(ql_dbg_async, vha, 0x507c, "Abort command issued - hdl=%x, type=%x\n", cmd_sp->handle, cmd_sp->type); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return rval; } @@ -191,7 +199,8 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) wait_for_completion(&abt_iocb->u.abt.comp); rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ? QLA_SUCCESS : QLA_ERR_FROM_FW; - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } return rval; @@ -286,10 +295,13 @@ static void qla2x00_async_login_sp_done(srb_t *sp, int res) ea.iop[0] = lio->u.logio.iop[0]; ea.iop[1] = lio->u.logio.iop[1]; ea.sp = sp; + if (res) + ea.data[0] = MBS_COMMAND_ERROR; qla24xx_handle_plogi_done_event(vha, &ea); } - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int @@ -308,6 +320,7 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, return rval; } + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; @@ -320,12 +333,10 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, sp->name = "login"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_login_sp_done); lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); - - sp->done = qla2x00_async_login_sp_done; if (N2N_TOPO(fcport->vha->hw) && fcport_is_bigger(fcport)) { lio->u.logio.flags |= SRB_LOGIN_PRLI_ONLY; } else { @@ -358,7 +369,8 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; done: fcport->flags &= ~FCF_ASYNC_ACTIVE; @@ -370,29 +382,26 @@ static void qla2x00_async_logout_sp_done(srb_t *sp, int res) sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); sp->fcport->login_gen++; qlt_logo_completion_handler(sp->fcport, sp->u.iocb_cmd.u.logio.data[0]); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport) { srb_t *sp; - struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; fcport->flags |= FCF_ASYNC_SENT; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_LOGOUT_CMD; sp->name = "logout"; - - lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); - - sp->done = qla2x00_async_logout_sp_done; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_logout_sp_done), ql_dbg(ql_dbg_disc, vha, 0x2070, "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC explicit %d.\n", @@ -406,7 +415,8 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); return rval; @@ -432,29 +442,26 @@ static void qla2x00_async_prlo_sp_done(srb_t *sp, int res) if (!test_bit(UNLOADING, &vha->dpc_flags)) qla2x00_post_async_prlo_done_work(sp->fcport->vha, sp->fcport, lio->u.logio.data); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport) { srb_t *sp; - struct srb_iocb *lio; int rval; rval = QLA_FUNCTION_FAILED; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_PRLO_CMD; sp->name = "prlo"; - - lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); - - sp->done = qla2x00_async_prlo_sp_done; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_prlo_sp_done); ql_dbg(ql_dbg_disc, vha, 0x2070, "Async-prlo - hdl=%x loop-id=%x portid=%02x%02x%02x.\n", @@ -468,7 +475,8 @@ qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; @@ -551,10 +559,12 @@ static void qla2x00_async_adisc_sp_done(srb_t *sp, int res) ea.iop[1] = lio->u.logio.iop[1]; ea.fcport = sp->fcport; ea.sp = sp; + if (res) + ea.data[0] = MBS_COMMAND_ERROR; qla24xx_handle_adisc_event(vha, &ea); - - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int @@ -565,26 +575,34 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; + if (IS_SESSION_DELETED(fcport)) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; + return rval; + } + if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) return rval; fcport->flags |= FCF_ASYNC_SENT; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_ADISC_CMD; sp->name = "adisc"; - - lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_adisc_sp_done); - sp->done = qla2x00_async_adisc_sp_done; - if (data[1] & QLA_LOGIO_LOGIN_RETRIED) + if (data[1] & QLA_LOGIO_LOGIN_RETRIED) { + lio = &sp->u.iocb_cmd; lio->u.logio.flags |= SRB_LOGIN_RETRIED; + } ql_dbg(ql_dbg_disc, vha, 0x206f, "Async-adisc - hdl=%x loopid=%x portid=%06x %8phC.\n", @@ -597,7 +615,8 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); qla2x00_post_async_adisc_work(vha, fcport, data); @@ -963,6 +982,9 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, set_bit(RELOGIN_NEEDED, &vha->dpc_flags); } break; + case ISP_CFG_NL: + qla24xx_fcport_handle_login(vha, fcport); + break; default: break; } @@ -1078,13 +1100,13 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) } spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) { srb_t *sp; - struct srb_iocb *mbx; int rval = QLA_FUNCTION_FAILED; unsigned long flags; u16 *mb; @@ -1109,6 +1131,7 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) vha->gnl.sent = 1; spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; @@ -1117,10 +1140,8 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) sp->name = "gnlist"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - mbx = &sp->u.iocb_cmd; - mbx->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla24xx_async_gnl_sp_done); mb = sp->u.iocb_cmd.u.mbx.out_mb; mb[0] = MBC_PORT_NODE_NAME_LIST; @@ -1132,8 +1153,6 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) mb[8] = vha->gnl.size; mb[9] = vha->vp_idx; - sp->done = qla24xx_async_gnl_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x20da, "Async-%s - OUT WWPN %8phC hndl %x\n", sp->name, fcport->port_name, sp->handle); @@ -1145,7 +1164,8 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~(FCF_ASYNC_ACTIVE | FCF_ASYNC_SENT); return rval; @@ -1191,7 +1211,7 @@ static void qla24xx_async_gpdb_sp_done(srb_t *sp, int res) dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in, sp->u.iocb_cmd.u.mbx.in_dma); - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport) @@ -1232,11 +1252,13 @@ static void qla2x00_async_prli_sp_done(srb_t *sp, int res) ea.sp = sp; if (res == QLA_OS_TIMER_EXPIRED) ea.data[0] = QLA_OS_TIMER_EXPIRED; + else if (res) + ea.data[0] = MBS_COMMAND_ERROR; qla24xx_handle_prli_done_event(vha, &ea); } - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int @@ -1269,12 +1291,10 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) sp->type = SRB_PRLI_CMD; sp->name = "prli"; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_prli_sp_done); lio = &sp->u.iocb_cmd; - lio->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); - - sp->done = qla2x00_async_prli_sp_done; lio->u.logio.flags = 0; if (NVME_TARGET(vha->hw, fcport)) @@ -1296,7 +1316,8 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; return rval; } @@ -1325,14 +1346,21 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) struct port_database_24xx *pd; struct qla_hw_data *ha = vha->hw; - if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) || - fcport->loop_id == FC_NO_LOOP_ID) { + if (IS_SESSION_DELETED(fcport)) { ql_log(ql_log_warn, vha, 0xffff, - "%s: %8phC - not sending command.\n", - __func__, fcport->port_name); + "%s: %8phC is being delete - not sending command.\n", + __func__, fcport->port_name); + fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; } + if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) { + ql_log(ql_log_warn, vha, 0xffff, + "%s: %8phC online %d flags %x - not sending command.\n", + __func__, fcport->port_name, vha->flags.online, fcport->flags); + goto done; + } + sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; @@ -1344,10 +1372,8 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) sp->name = "gpdb"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; - - mbx = &sp->u.iocb_cmd; - mbx->timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla24xx_async_gpdb_sp_done); pd = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma); if (pd == NULL) { @@ -1366,11 +1392,10 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) mb[9] = vha->vp_idx; mb[10] = opt; - mbx->u.mbx.in = pd; + mbx = &sp->u.iocb_cmd; + mbx->u.mbx.in = (void *)pd; mbx->u.mbx.in_dma = pd_dma; - sp->done = qla24xx_async_gpdb_sp_done; - ql_dbg(ql_dbg_disc, vha, 0x20dc, "Async-%s %8phC hndl %x opt %x\n", sp->name, fcport->port_name, sp->handle, opt); @@ -1384,7 +1409,7 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) if (pd) dma_pool_free(ha->s_dma_pool, pd, pd_dma); - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; done: fcport->flags &= ~FCF_ASYNC_ACTIVE; @@ -1556,6 +1581,11 @@ static void qla_chk_n2n_b4_login(struct scsi_qla_host *vha, fc_port_t *fcport) u8 login = 0; int rc; + ql_dbg(ql_dbg_disc, vha, 0x307b, + "%s %8phC DS %d LS %d lid %d retries=%d\n", + __func__, fcport->port_name, fcport->disc_state, + fcport->fw_login_state, fcport->loop_id, fcport->login_retry); + if (qla_tgt_mode_enabled(vha)) return; @@ -1614,7 +1644,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) fcport->login_gen, fcport->loop_id, fcport->scan_state, fcport->fc4_type); - if (fcport->scan_state != QLA_FCPORT_FOUND) + if (fcport->scan_state != QLA_FCPORT_FOUND || + fcport->disc_state == DSC_DELETE_PEND) return 0; if ((fcport->loop_id != FC_NO_LOOP_ID) && @@ -1635,7 +1666,7 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) if (vha->host->active_mode == MODE_TARGET && !N2N_TOPO(vha->hw)) return 0; - if (fcport->flags & FCF_ASYNC_SENT) { + if (fcport->flags & (FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE)) { set_bit(RELOGIN_NEEDED, &vha->dpc_flags); return 0; } @@ -1970,22 +2001,21 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, srb_t *sp; int rval = QLA_FUNCTION_FAILED; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; - tm_iocb = &sp->u.iocb_cmd; sp->type = SRB_TM_CMD; sp->name = "tmf"; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha), + qla2x00_tmf_sp_done); + sp->u.iocb_cmd.timeout = qla2x00_tmf_iocb_timeout; - tm_iocb->timeout = qla2x00_tmf_iocb_timeout; + tm_iocb = &sp->u.iocb_cmd; init_completion(&tm_iocb->u.tmf.comp); - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)); - tm_iocb->u.tmf.flags = flags; tm_iocb->u.tmf.lun = lun; - tm_iocb->u.tmf.data = tag; - sp->done = qla2x00_tmf_sp_done; ql_dbg(ql_dbg_taskm, vha, 0x802f, "Async-tmf hdl=%x loop-id=%x portid=%02x%02x%02x.\n", @@ -2015,7 +2045,8 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, } done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); fcport->flags &= ~FCF_ASYNC_SENT; done: return rval; @@ -2074,13 +2105,6 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) qla24xx_post_gpdb_work(vha, ea->fcport, 0); break; default: - if ((ea->iop[0] == LSC_SCODE_ELS_REJECT) && - (ea->iop[1] == 0x50000)) { /* reson 5=busy expl:0x0 */ - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - ea->fcport->fw_login_state = DSC_LS_PLOGI_COMP; - break; - } - sp = ea->sp; ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC priority %s, fc4type %x prev try %s\n", @@ -2224,12 +2248,7 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea) ql_dbg(ql_dbg_disc, vha, 0x20eb, "%s %d %8phC cmd error %x\n", __func__, __LINE__, ea->fcport->port_name, ea->data[1]); - ea->fcport->flags &= ~FCF_ASYNC_SENT; - qla2x00_set_fcport_disc_state(ea->fcport, DSC_LOGIN_FAILED); - if (ea->data[1] & QLA_LOGIO_LOGIN_RETRIED) - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - else - qla2x00_mark_device_lost(vha, ea->fcport, 1); + qlt_schedule_sess_for_deletion(ea->fcport); break; case MBS_LOOP_ID_USED: /* data[1] = IO PARAM 1 = nport ID */ @@ -3472,6 +3491,14 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) struct rsp_que *rsp = ha->rsp_q_map[0]; struct qla2xxx_fw_dump *fw_dump; + if (ha->fw_dump) { + ql_dbg(ql_dbg_init, vha, 0x00bd, + "Firmware dump already allocated.\n"); + return; + } + + ha->fw_dumped = 0; + ha->fw_dump_cap_flags = 0; dump_size = fixed_size = mem_size = eft_size = fce_size = mq_size = 0; req_q_size = rsp_q_size = 0; @@ -3482,7 +3509,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x11000 + 1) * sizeof(uint16_t); } else if (IS_FWI2_CAPABLE(ha)) { - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) + if (IS_QLA83XX(ha)) fixed_size = offsetof(struct qla83xx_fw_dump, ext_mem); else if (IS_QLA81XX(ha)) fixed_size = offsetof(struct qla81xx_fw_dump, ext_mem); @@ -3494,8 +3521,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x100000 + 1) * sizeof(uint32_t); if (ha->mqenable) { - if (!IS_QLA83XX(ha) && !IS_QLA27XX(ha) && - !IS_QLA28XX(ha)) + if (!IS_QLA83XX(ha)) mq_size = sizeof(struct qla2xxx_mq_chain); /* * Allocate maximum buffer size for all queues - Q0. @@ -4056,8 +4082,7 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) ha->fw_major_version, ha->fw_minor_version, ha->fw_subminor_version); - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || - IS_QLA28XX(ha)) { + if (IS_QLA83XX(ha)) { ha->flags.fac_supported = 0; rval = QLA_SUCCESS; } @@ -5602,6 +5627,13 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) memcpy(fcport->node_name, new_fcport->node_name, WWN_SIZE); fcport->scan_state = QLA_FCPORT_FOUND; + if (fcport->login_retry == 0) { + fcport->login_retry = vha->hw->login_retry_count; + ql_dbg(ql_dbg_disc, vha, 0x2135, + "Port login retry %8phN, lid 0x%04x retry cnt=%d.\n", + fcport->port_name, fcport->loop_id, + fcport->login_retry); + } found++; break; } @@ -5735,6 +5767,8 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport) if (atomic_read(&fcport->state) == FCS_ONLINE) return; + qla2x00_set_fcport_state(fcport, FCS_ONLINE); + rport_ids.node_name = wwn_to_u64(fcport->node_name); rport_ids.port_name = wwn_to_u64(fcport->port_name); rport_ids.port_id = fcport->d_id.b.domain << 16 | @@ -5835,6 +5869,7 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) qla2x00_reg_remote_port(vha, fcport); break; case MODE_TARGET: + qla2x00_set_fcport_state(fcport, FCS_ONLINE); if (!vha->vha_tgt.qla_tgt->tgt_stop && !vha->vha_tgt.qla_tgt->tgt_stopped) qlt_fc_port_added(vha, fcport); @@ -5852,8 +5887,6 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) if (NVME_TARGET(vha->hw, fcport)) qla_nvme_register_remote(vha, fcport); - qla2x00_set_fcport_state(fcport, FCS_ONLINE); - if (IS_IIDMA_CAPABLE(vha->hw) && vha->hw->flags.gpsc_supported) { if (fcport->id_changed) { fcport->id_changed = 0; @@ -9390,7 +9423,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, qpair->rsp->req = qpair->req; qpair->rsp->qpair = qpair; /* init qpair to this cpu. Will adjust at run time. */ - qla_cpu_update(qpair, smp_processor_id()); + qla_cpu_update(qpair, raw_smp_processor_id()); if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) { if (ha->fw_attributes & BIT_4) diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index 5f3b7995cc8f3..db17f7f410cdd 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -184,6 +184,8 @@ static void qla2xxx_init_sp(srb_t *sp, scsi_qla_host_t *vha, sp->vha = vha; sp->qpair = qpair; sp->cmd_type = TYPE_SRB; + /* ref : INIT - normal flow */ + kref_init(&sp->cmd_kref); INIT_LIST_HEAD(&sp->elem); } diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index ed604f2185bf2..e0fe9ddb4bd2c 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2560,11 +2560,38 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk) } } -void qla2x00_init_timer(srb_t *sp, unsigned long tmo) +static void +qla2x00_async_done(struct srb *sp, int res) +{ + if (del_timer(&sp->u.iocb_cmd.timer)) { + /* + * Successfully cancelled the timeout handler + * ref: TMR + */ + if (kref_put(&sp->cmd_kref, qla2x00_sp_release)) + return; + } + sp->async_done(sp, res); +} + +void +qla2x00_sp_release(struct kref *kref) +{ + struct srb *sp = container_of(kref, struct srb, cmd_kref); + + sp->free(sp); +} + +void +qla2x00_init_async_sp(srb_t *sp, unsigned long tmo, + void (*done)(struct srb *sp, int res)) { timer_setup(&sp->u.iocb_cmd.timer, qla2x00_sp_timeout, 0); - sp->u.iocb_cmd.timer.expires = jiffies + tmo * HZ; + sp->done = qla2x00_async_done; + sp->async_done = done; sp->free = qla2x00_sp_free; + sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; + sp->u.iocb_cmd.timer.expires = jiffies + tmo * HZ; if (IS_QLAFX00(sp->vha->hw) && sp->type == SRB_FXIOCB_DCMD) init_completion(&sp->u.iocb_cmd.u.fxiocb.fxiocb_comp); sp->start_timer = 1; @@ -2651,7 +2678,9 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, return -ENOMEM; } - /* Alloc SRB structure */ + /* Alloc SRB structure + * ref: INIT + */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { kfree(fcport); @@ -2672,18 +2701,19 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, sp->type = SRB_ELS_DCMD; sp->name = "ELS_DCMD"; sp->fcport = fcport; - elsio->timeout = qla2x00_els_dcmd_iocb_timeout; - qla2x00_init_timer(sp, ELS_DCMD_TIMEOUT); - init_completion(&sp->u.iocb_cmd.u.els_logo.comp); - sp->done = qla2x00_els_dcmd_sp_done; + qla2x00_init_async_sp(sp, ELS_DCMD_TIMEOUT, + qla2x00_els_dcmd_sp_done); sp->free = qla2x00_els_dcmd_sp_free; + sp->u.iocb_cmd.timeout = qla2x00_els_dcmd_iocb_timeout; + init_completion(&sp->u.iocb_cmd.u.els_logo.comp); elsio->u.els_logo.els_logo_pyld = dma_alloc_coherent(&ha->pdev->dev, DMA_POOL_SIZE, &elsio->u.els_logo.els_logo_pyld_dma, GFP_KERNEL); if (!elsio->u.els_logo.els_logo_pyld) { - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return QLA_FUNCTION_FAILED; } @@ -2706,7 +2736,8 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return QLA_FUNCTION_FAILED; } @@ -2717,7 +2748,8 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode, wait_for_completion(&elsio->u.els_logo.comp); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return rval; } @@ -2850,7 +2882,6 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) sp->name, res, sp->handle, fcport->d_id.b24, fcport->port_name); fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE); - del_timer(&sp->u.iocb_cmd.timer); if (sp->flags & SRB_WAKEUP_ON_COMP) complete(&lio->u.els_plogi.comp); @@ -2927,6 +2958,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); + break; } fallthrough; default: @@ -2936,9 +2968,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) fw_status[0], fw_status[1], fw_status[2]); fcport->flags &= ~FCF_ASYNC_SENT; - qla2x00_set_fcport_disc_state(fcport, - DSC_LOGIN_FAILED); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + qlt_schedule_sess_for_deletion(fcport); break; } break; @@ -2950,8 +2980,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) fw_status[0], fw_status[1], fw_status[2]); sp->fcport->flags &= ~FCF_ASYNC_SENT; - qla2x00_set_fcport_disc_state(fcport, DSC_LOGIN_FAILED); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + qlt_schedule_sess_for_deletion(fcport); break; } @@ -2960,7 +2989,8 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) struct srb_iocb *elsio = &sp->u.iocb_cmd; qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return; } e->u.iosb.sp = sp; @@ -2978,7 +3008,9 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, int rval = QLA_SUCCESS; void *ptr, *resp_ptr; - /* Alloc SRB structure */ + /* Alloc SRB structure + * ref: INIT + */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) { ql_log(ql_log_info, vha, 0x70e6, @@ -2993,17 +3025,16 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, ql_dbg(ql_dbg_io, vha, 0x3073, "%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24); - sp->type = SRB_ELS_DCMD; - sp->name = "ELS_DCMD"; - sp->fcport = fcport; - - elsio->timeout = qla2x00_els_dcmd2_iocb_timeout; if (wait) sp->flags = SRB_WAKEUP_ON_COMP; - qla2x00_init_timer(sp, ELS_DCMD_TIMEOUT + 2); + sp->type = SRB_ELS_DCMD; + sp->name = "ELS_DCMD"; + sp->fcport = fcport; + qla2x00_init_async_sp(sp, ELS_DCMD_TIMEOUT + 2, + qla2x00_els_dcmd2_sp_done); + sp->u.iocb_cmd.timeout = qla2x00_els_dcmd2_iocb_timeout; - sp->done = qla2x00_els_dcmd2_sp_done; elsio->u.els_plogi.tx_size = elsio->u.els_plogi.rx_size = DMA_POOL_SIZE; ptr = elsio->u.els_plogi.els_plogi_pyld = @@ -3068,7 +3099,8 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode, out: fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi); - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } @@ -3879,8 +3911,15 @@ qla2x00_start_sp(srb_t *sp) break; } - if (sp->start_timer) + if (sp->start_timer) { + /* ref: TMR timer ref + * this code should be just before start_iocbs function + * This will make sure that caller function don't to do + * kref_put even on failure + */ + kref_get(&sp->cmd_kref); add_timer(&sp->u.iocb_cmd.timer); + } wmb(); qla2x00_start_iocbs(vha, qp->req); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index aaf6504570fdd..198b782d77901 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2498,6 +2498,7 @@ qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk) iocb->u.tmf.data = QLA_FUNCTION_FAILED; } else if ((le16_to_cpu(sts->scsi_status) & SS_RESPONSE_INFO_LEN_VALID)) { + host_to_fcp_swap(sts->data, sizeof(sts->data)); if (le32_to_cpu(sts->rsp_data_len) < 4) { ql_log(ql_log_warn, fcport->vha, 0x503b, "Async-%s error - hdl=%x not enough response(%d).\n", diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 10d2655ef6767..7f236db058869 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -9,6 +9,12 @@ #include #include +#ifdef CONFIG_PPC +#define IS_PPCARCH true +#else +#define IS_PPCARCH false +#endif + static struct mb_cmd_name { uint16_t cmd; const char *str; @@ -728,6 +734,9 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) vha->min_supported_speed = nv->min_supported_speed; } + + if (IS_PPCARCH) + mcp->mb[11] |= BIT_4; } if (ha->flags.exlogins_enabled) @@ -3029,8 +3038,7 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *vha) ha->orig_fw_iocb_count = mcp->mb[10]; if (ha->flags.npiv_supported) ha->max_npiv_vports = mcp->mb[11]; - if (IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha) || - IS_QLA28XX(ha)) + if (IS_QLA81XX(ha) || IS_QLA83XX(ha)) ha->fw_max_fcf_count = mcp->mb[12]; } @@ -5621,7 +5629,7 @@ qla2x00_get_data_rate(scsi_qla_host_t *vha) mcp->out_mb = MBX_1|MBX_0; mcp->in_mb = MBX_2|MBX_1|MBX_0; if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) - mcp->in_mb |= MBX_3; + mcp->in_mb |= MBX_4|MBX_3; mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; rval = qla2x00_mailbox_command(vha, mcp); @@ -6479,23 +6487,21 @@ int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp) if (!vha->hw->flags.fw_started) goto done; + /* ref: INIT */ sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); if (!sp) goto done; - sp->type = SRB_MB_IOCB; - sp->name = mb_to_str(mcp->mb[0]); - c = &sp->u.iocb_cmd; - c->timeout = qla2x00_async_iocb_timeout; init_completion(&c->u.mbx.comp); - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + sp->type = SRB_MB_IOCB; + sp->name = mb_to_str(mcp->mb[0]); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_mb_sp_done); memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG); - sp->done = qla2x00_async_mb_sp_done; - rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { ql_dbg(ql_dbg_mbx, vha, 0x1018, @@ -6527,7 +6533,8 @@ int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp) } done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 1c024055f8c50..e6b5c4ccce97b 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -965,6 +965,7 @@ int qla24xx_control_vp(scsi_qla_host_t *vha, int cmd) if (vp_index == 0 || vp_index >= ha->max_npiv_vports) return QLA_PARAMETER_ERROR; + /* ref: INIT */ sp = qla2x00_get_sp(base_vha, NULL, GFP_KERNEL); if (!sp) return rval; @@ -972,9 +973,8 @@ int qla24xx_control_vp(scsi_qla_host_t *vha, int cmd) sp->type = SRB_CTRL_VP; sp->name = "ctrl_vp"; sp->comp = ∁ - sp->done = qla_ctrlvp_sp_done; - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla_ctrlvp_sp_done); sp->u.iocb_cmd.u.ctrlvp.cmd = cmd; sp->u.iocb_cmd.u.ctrlvp.vp_index = vp_index; @@ -1008,6 +1008,7 @@ int qla24xx_control_vp(scsi_qla_host_t *vha, int cmd) break; } done: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); return rval; } diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index 350b0c4346fb6..f726eb8449c5e 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -1787,17 +1787,18 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type) struct register_host_info *preg_hsi; struct new_utsname *p_sysid = NULL; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; sp->type = SRB_FXIOCB_DCMD; sp->name = "fxdisc"; + qla2x00_init_async_sp(sp, FXDISC_TIMEOUT, + qla2x00_fxdisc_sp_done); + sp->u.iocb_cmd.timeout = qla2x00_fxdisc_iocb_timeout; fdisc = &sp->u.iocb_cmd; - fdisc->timeout = qla2x00_fxdisc_iocb_timeout; - qla2x00_init_timer(sp, FXDISC_TIMEOUT); - switch (fx_type) { case FXDISC_GET_CONFIG_INFO: fdisc->u.fxiocb.flags = @@ -1898,7 +1899,6 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type) } fdisc->u.fxiocb.req_func_type = cpu_to_le16(fx_type); - sp->done = qla2x00_fxdisc_sp_done; rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) @@ -1974,7 +1974,8 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type) dma_free_coherent(&ha->pdev->dev, fdisc->u.fxiocb.req_len, fdisc->u.fxiocb.req_addr, fdisc->u.fxiocb.req_dma_handle); done_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: return rval; } diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index e22ec7cb65db5..4cfc2efdf7766 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -37,6 +37,11 @@ int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) (fcport->nvme_flag & NVME_FLAG_REGISTERED)) return 0; + if (atomic_read(&fcport->state) == FCS_ONLINE) + return 0; + + qla2x00_set_fcport_state(fcport, FCS_ONLINE); + fcport->nvme_flag &= ~NVME_FLAG_RESETTING; memset(&req, 0, sizeof(struct nvme_fc_port_info)); @@ -170,6 +175,18 @@ static void qla_nvme_release_fcp_cmd_kref(struct kref *kref) qla2xxx_rel_qpair_sp(sp->qpair, sp); } +static void qla_nvme_ls_unmap(struct srb *sp, struct nvmefc_ls_req *fd) +{ + if (sp->flags & SRB_DMA_VALID) { + struct srb_iocb *nvme = &sp->u.iocb_cmd; + struct qla_hw_data *ha = sp->fcport->vha->hw; + + dma_unmap_single(&ha->pdev->dev, nvme->u.nvme.cmd_dma, + fd->rqstlen, DMA_TO_DEVICE); + sp->flags &= ~SRB_DMA_VALID; + } +} + static void qla_nvme_release_ls_cmd_kref(struct kref *kref) { struct srb *sp = container_of(kref, struct srb, cmd_kref); @@ -186,6 +203,8 @@ static void qla_nvme_release_ls_cmd_kref(struct kref *kref) spin_unlock_irqrestore(&priv->cmd_lock, flags); fd = priv->fd; + + qla_nvme_ls_unmap(sp, fd); fd->done(fd, priv->comp_status); out: qla2x00_rel_sp(sp); @@ -356,6 +375,8 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, fd->rqstlen, DMA_TO_DEVICE); + sp->flags |= SRB_DMA_VALID; + rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x700e, @@ -363,6 +384,7 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, wake_up(&sp->nvme_ls_waitq); sp->priv = NULL; priv->sp = NULL; + qla_nvme_ls_unmap(sp, fd); qla2x00_rel_sp(sp); return rval; } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index abcd309172638..6dc2189badd33 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -728,7 +728,8 @@ void qla2x00_sp_compl(srb_t *sp, int res) struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct completion *comp = sp->comp; - sp->free(sp); + /* kref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); cmd->result = res; CMD_SP(cmd) = NULL; scsi_done(cmd); @@ -819,7 +820,8 @@ void qla2xxx_qpair_sp_compl(srb_t *sp, int res) struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct completion *comp = sp->comp; - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); cmd->result = res; CMD_SP(cmd) = NULL; scsi_done(cmd); @@ -919,6 +921,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) goto qc24_target_busy; sp = scsi_cmd_priv(cmd); + /* ref: INIT */ qla2xxx_init_sp(sp, vha, vha->hw->base_qpair, fcport); sp->u.scmd.cmd = cmd; @@ -938,7 +941,8 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) return 0; qc24_host_busy_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); qc24_target_busy: return SCSI_MLQUEUE_TARGET_BUSY; @@ -1008,6 +1012,7 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd, goto qc24_target_busy; sp = scsi_cmd_priv(cmd); + /* ref: INIT */ qla2xxx_init_sp(sp, vha, qpair, fcport); sp->u.scmd.cmd = cmd; @@ -1026,7 +1031,8 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd, return 0; qc24_host_busy_free_sp: - sp->free(sp); + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); qc24_target_busy: return SCSI_MLQUEUE_TARGET_BUSY; @@ -3748,8 +3754,7 @@ qla2x00_unmap_iobases(struct qla_hw_data *ha) if (ha->mqiobase) iounmap(ha->mqiobase); - if ((IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) && - ha->msixbase) + if (ha->msixbase) iounmap(ha->msixbase); } } @@ -3891,6 +3896,8 @@ qla24xx_free_purex_list(struct purex_list *list) spin_lock_irqsave(&list->lock, flags); list_for_each_entry_safe(item, next, &list->head, list) { list_del(&item->list); + if (item == &item->vha->default_item) + continue; kfree(item); } spin_unlock_irqrestore(&list->lock, flags); @@ -5526,6 +5533,11 @@ void qla2x00_relogin(struct scsi_qla_host *vha) memset(&ea, 0, sizeof(ea)); ea.fcport = fcport; qla24xx_handle_relogin_event(vha, &ea); + } else if (vha->hw->current_topology == + ISP_CFG_NL && + IS_QLA2XXX_MIDTYPE(vha->hw)) { + (void)qla24xx_fcport_handle_login(vha, + fcport); } else if (vha->hw->current_topology == ISP_CFG_NL) { fcport->login_retry--; @@ -7199,7 +7211,7 @@ static bool qla_do_heartbeat(struct scsi_qla_host *vha) return do_heartbeat; } -static void qla_heart_beat(struct scsi_qla_host *vha) +static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started) { struct qla_hw_data *ha = vha->hw; @@ -7209,8 +7221,19 @@ static void qla_heart_beat(struct scsi_qla_host *vha) if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha)) return; - if (qla_do_heartbeat(vha)) + /* + * dpc thread cannot run if heartbeat is running at the same time. + * We also do not want to starve heartbeat task. Therefore, do + * heartbeat task at least once every 5 seconds. + */ + if (dpc_started && + time_before(jiffies, ha->last_heartbeat_run_jiffies + 5 * HZ)) + return; + + if (qla_do_heartbeat(vha)) { + ha->last_heartbeat_run_jiffies = jiffies; queue_work(ha->wq, &ha->heartbeat_work); + } } /************************************************************************** @@ -7401,6 +7424,8 @@ qla2x00_timer(struct timer_list *t) start_dpc++; } + /* borrowing w to signify dpc will run */ + w = 0; /* Schedule the DPC routine if needed */ if ((test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags) || @@ -7433,9 +7458,10 @@ qla2x00_timer(struct timer_list *t) test_bit(RELOGIN_NEEDED, &vha->dpc_flags), test_bit(PROCESS_PUREX_IOCB, &vha->dpc_flags)); qla2xxx_wake_dpc(vha); + w = 1; } - qla_heart_beat(vha); + qla_heart_beat(vha, w); qla2x00_restart_timer(vha, WATCH_INTERVAL); } @@ -7633,7 +7659,7 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) switch (state) { case pci_channel_io_normal: - ha->flags.eeh_busy = 0; + qla_pci_set_eeh_busy(vha); if (ql2xmqsupport || ql2xnvmeenable) { set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); @@ -7674,9 +7700,16 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) "mmio enabled\n"); ha->pci_error_state = QLA_PCI_MMIO_ENABLED; + if (IS_QLA82XX(ha)) return PCI_ERS_RESULT_RECOVERED; + if (qla2x00_isp_reg_stat(ha)) { + ql_log(ql_log_info, base_vha, 0x803f, + "During mmio enabled, PCI/Register disconnect still detected.\n"); + goto out; + } + spin_lock_irqsave(&ha->hardware_lock, flags); if (IS_QLA2100(ha) || IS_QLA2200(ha)){ stat = rd_reg_word(®->hccr); @@ -7698,6 +7731,7 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) "RISC paused -- mmio_enabled, Dumping firmware.\n"); qla2xxx_dump_fw(base_vha); } +out: /* set PCI_ERS_RESULT_NEED_RESET to trigger call to qla2xxx_pci_slot_reset */ ql_dbg(ql_dbg_aer, base_vha, 0x600d, "mmio enabled returning.\n"); diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index a0aeba69513d4..c092a6b1ced4f 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -844,7 +844,7 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr) ha->flt_region_nvram = start; break; case FLT_REG_IMG_PRI_27XX: - if (IS_QLA27XX(ha) && !IS_QLA28XX(ha)) + if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) ha->flt_region_img_status_pri = start; break; case FLT_REG_IMG_SEC_27XX: @@ -1356,7 +1356,7 @@ qla24xx_write_flash_data(scsi_qla_host_t *vha, __le32 *dwptr, uint32_t faddr, flash_data_addr(ha, faddr), le32_to_cpu(*dwptr)); if (ret) { ql_dbg(ql_dbg_user, vha, 0x7006, - "Failed slopw write %x (%x)\n", faddr, *dwptr); + "Failed slow write %x (%x)\n", faddr, *dwptr); break; } } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 8993d438e0b72..b109716d44fb7 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -620,7 +620,7 @@ static void qla2x00_async_nack_sp_done(srb_t *sp, int res) } spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); } int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport, @@ -656,12 +656,10 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport, sp->type = type; sp->name = "nack"; - - sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout; - qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2); + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, + qla2x00_async_nack_sp_done); sp->u.iocb_cmd.u.nack.ntfy = ntfy; - sp->done = qla2x00_async_nack_sp_done; ql_dbg(ql_dbg_disc, vha, 0x20f4, "Async-%s %8phC hndl %x %s\n", @@ -674,7 +672,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport, return rval; done_free_sp: - sp->free(sp); + kref_put(&sp->cmd_kref, qla2x00_sp_release); done: fcport->flags &= ~FCF_ASYNC_SENT; return rval; @@ -3320,6 +3318,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, "RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n", vha->flags.online, qla2x00_reset_active(vha), cmd->reset_count, qpair->chip_reset); + res = 0; goto out_unmap_unlock; } @@ -7221,8 +7220,7 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) if (!QLA_TGT_MODE_ENABLED()) return; - if ((ql2xenablemsix == 0) || IS_QLA83XX(ha) || IS_QLA27XX(ha) || - IS_QLA28XX(ha)) { + if (ha->mqenable || IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { ISP_ATIO_Q_IN(base_vha) = &ha->mqiobase->isp25mq.atio_q_in; ISP_ATIO_Q_OUT(base_vha) = &ha->mqiobase->isp25mq.atio_q_out; } else { diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 26c13a953b975..b0a74b036cf4b 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -435,8 +435,13 @@ qla27xx_fwdt_entry_t266(struct scsi_qla_host *vha, { ql_dbg(ql_dbg_misc, vha, 0xd20a, "%s: reset risc [%lx]\n", __func__, *len); - if (buf) - WARN_ON_ONCE(qla24xx_soft_reset(vha->hw) != QLA_SUCCESS); + if (buf) { + if (qla24xx_soft_reset(vha->hw) != QLA_SUCCESS) { + ql_dbg(ql_dbg_async, vha, 0x5001, + "%s: unable to soft reset\n", __func__); + return INVALID_ENTRY; + } + } return qla27xx_next_entry(ent); } diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 60a6ae9d1219f..a75499616f5ef 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -484,8 +484,13 @@ static void scsi_report_sense(struct scsi_device *sdev, if (sshdr->asc == 0x29) { evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED; - sdev_printk(KERN_WARNING, sdev, - "Power-on or device reset occurred\n"); + /* + * Do not print message if it is an expected side-effect + * of runtime PM. + */ + if (!sdev->silence_suspend) + sdev_printk(KERN_WARNING, sdev, + "Power-on or device reset occurred\n"); } if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) { diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 60e406bcf42a9..a2524106206db 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -34,7 +34,7 @@ static int fc_bsg_hostadd(struct Scsi_Host *, struct fc_host_attrs *); static int fc_bsg_rportadd(struct Scsi_Host *, struct fc_rport *); static void fc_bsg_remove(struct request_queue *); static void fc_bsg_goose_queue(struct fc_rport *); -static void fc_li_stats_update(struct fc_fn_li_desc *li_desc, +static void fc_li_stats_update(u16 event_type, struct fc_fpin_stats *stats); static void fc_delivery_stats_update(u32 reason_code, struct fc_fpin_stats *stats); @@ -670,42 +670,34 @@ fc_find_rport_by_wwpn(struct Scsi_Host *shost, u64 wwpn) EXPORT_SYMBOL(fc_find_rport_by_wwpn); static void -fc_li_stats_update(struct fc_fn_li_desc *li_desc, +fc_li_stats_update(u16 event_type, struct fc_fpin_stats *stats) { - stats->li += be32_to_cpu(li_desc->event_count); - switch (be16_to_cpu(li_desc->event_type)) { + stats->li++; + switch (event_type) { case FPIN_LI_UNKNOWN: - stats->li_failure_unknown += - be32_to_cpu(li_desc->event_count); + stats->li_failure_unknown++; break; case FPIN_LI_LINK_FAILURE: - stats->li_link_failure_count += - be32_to_cpu(li_desc->event_count); + stats->li_link_failure_count++; break; case FPIN_LI_LOSS_OF_SYNC: - stats->li_loss_of_sync_count += - be32_to_cpu(li_desc->event_count); + stats->li_loss_of_sync_count++; break; case FPIN_LI_LOSS_OF_SIG: - stats->li_loss_of_signals_count += - be32_to_cpu(li_desc->event_count); + stats->li_loss_of_signals_count++; break; case FPIN_LI_PRIM_SEQ_ERR: - stats->li_prim_seq_err_count += - be32_to_cpu(li_desc->event_count); + stats->li_prim_seq_err_count++; break; case FPIN_LI_INVALID_TX_WD: - stats->li_invalid_tx_word_count += - be32_to_cpu(li_desc->event_count); + stats->li_invalid_tx_word_count++; break; case FPIN_LI_INVALID_CRC: - stats->li_invalid_crc_count += - be32_to_cpu(li_desc->event_count); + stats->li_invalid_crc_count++; break; case FPIN_LI_DEVICE_SPEC: - stats->li_device_specific += - be32_to_cpu(li_desc->event_count); + stats->li_device_specific++; break; } } @@ -767,6 +759,7 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv) struct fc_rport *attach_rport = NULL; struct fc_host_attrs *fc_host = shost_to_fc_host(shost); struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv; + u16 event_type = be16_to_cpu(li_desc->event_type); u64 wwpn; rport = fc_find_rport_by_wwpn(shost, @@ -775,7 +768,7 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv) (rport->roles & FC_PORT_ROLE_FCP_TARGET || rport->roles & FC_PORT_ROLE_NVME_TARGET)) { attach_rport = rport; - fc_li_stats_update(li_desc, &attach_rport->fpin_stats); + fc_li_stats_update(event_type, &attach_rport->fpin_stats); } if (be32_to_cpu(li_desc->pname_count) > 0) { @@ -789,14 +782,14 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv) rport->roles & FC_PORT_ROLE_NVME_TARGET)) { if (rport == attach_rport) continue; - fc_li_stats_update(li_desc, + fc_li_stats_update(event_type, &rport->fpin_stats); } } } if (fc_host->port_name == be64_to_cpu(li_desc->attached_wwpn)) - fc_li_stats_update(li_desc, &fc_host->fpin_stats); + fc_li_stats_update(event_type, &fc_host->fpin_stats); } /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 62eb9921cc947..66056806159a6 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3752,7 +3752,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) return 0; if (sdkp->WCE && sdkp->media_present) { - sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); + if (!sdkp->device->silence_suspend) + sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); ret = sd_sync_cache(sdkp, &sshdr); if (ret) { @@ -3774,7 +3775,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) } if (sdkp->device->manage_start_stop) { - sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); + if (!sdkp->device->silence_suspend) + sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); /* an error is not worth aborting a system sleep */ ret = sd_start_stop_device(sdkp, 0); if (ignore_stop_errors) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 9349557b8a01b..cb285d277201c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -585,7 +585,12 @@ static void ufshcd_print_pwr_info(struct ufs_hba *hba) "INVALID MODE", }; - dev_err(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n", + /* + * Using dev_dbg to avoid messages during runtime PM to avoid + * never-ending cycles of messages written back to storage by user space + * causing runtime resume, causing more messages and so on. + */ + dev_dbg(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n", __func__, hba->pwr_info.gear_rx, hba->pwr_info.gear_tx, hba->pwr_info.lane_rx, hba->pwr_info.lane_tx, @@ -5024,6 +5029,12 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) pm_runtime_get_noresume(&sdev->sdev_gendev); else if (ufshcd_is_rpm_autosuspend_allowed(hba)) sdev->rpm_autosuspend = 1; + /* + * Do not print messages during runtime PM to avoid never-ending cycles + * of messages written back to storage by user space causing runtime + * resume, causing more messages and so on. + */ + sdev->silence_suspend = 1; ufshcd_crypto_register(hba, q); @@ -7339,7 +7350,13 @@ static u32 ufshcd_find_max_sup_active_icc_level(struct ufs_hba *hba, if (!hba->vreg_info.vcc || !hba->vreg_info.vccq || !hba->vreg_info.vccq2) { - dev_err(hba->dev, + /* + * Using dev_dbg to avoid messages during runtime PM to avoid + * never-ending cycles of messages written back to storage by + * user space causing runtime resume, causing more messages and + * so on. + */ + dev_dbg(hba->dev, "%s: Regulator capability was not set, actvIccLevel=%d", __func__, icc_level); goto out; diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c index b762bc40f56bd..afd2fd74802d2 100644 --- a/drivers/soc/mediatek/mtk-pm-domains.c +++ b/drivers/soc/mediatek/mtk-pm-domains.c @@ -443,6 +443,9 @@ generic_pm_domain *scpsys_add_one_domain(struct scpsys *scpsys, struct device_no pd->genpd.power_off = scpsys_power_off; pd->genpd.power_on = scpsys_power_on; + if (MTK_SCPD_CAPS(pd, MTK_SCPD_ACTIVE_WAKEUP)) + pd->genpd.flags |= GENPD_FLAG_ACTIVE_WAKEUP; + if (MTK_SCPD_CAPS(pd, MTK_SCPD_KEEP_DEFAULT_OFF)) pm_genpd_init(&pd->genpd, NULL, true); else diff --git a/drivers/soc/qcom/ocmem.c b/drivers/soc/qcom/ocmem.c index d2dacbbaafbd1..97fd24c178f8d 100644 --- a/drivers/soc/qcom/ocmem.c +++ b/drivers/soc/qcom/ocmem.c @@ -206,6 +206,7 @@ struct ocmem *of_get_ocmem(struct device *dev) ocmem = platform_get_drvdata(pdev); if (!ocmem) { dev_err(dev, "Cannot get ocmem\n"); + put_device(&pdev->dev); return ERR_PTR(-ENODEV); } return ocmem; diff --git a/drivers/soc/qcom/qcom_aoss.c b/drivers/soc/qcom/qcom_aoss.c index cbe5e39fdaeb0..a59bb34e5ebaf 100644 --- a/drivers/soc/qcom/qcom_aoss.c +++ b/drivers/soc/qcom/qcom_aoss.c @@ -451,7 +451,11 @@ struct qmp *qmp_get(struct device *dev) qmp = platform_get_drvdata(pdev); - return qmp ? qmp : ERR_PTR(-EPROBE_DEFER); + if (!qmp) { + put_device(&pdev->dev); + return ERR_PTR(-EPROBE_DEFER); + } + return qmp; } EXPORT_SYMBOL(qmp_get); @@ -497,7 +501,7 @@ static int qmp_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - ret = devm_request_irq(&pdev->dev, irq, qmp_intr, IRQF_ONESHOT, + ret = devm_request_irq(&pdev->dev, irq, qmp_intr, 0, "aoss-qmp", qmp); if (ret < 0) { dev_err(&pdev->dev, "failed to request interrupt\n"); diff --git a/drivers/soc/qcom/rpmpd.c b/drivers/soc/qcom/rpmpd.c index 0a8d8d24bfb77..624b5630feb87 100644 --- a/drivers/soc/qcom/rpmpd.c +++ b/drivers/soc/qcom/rpmpd.c @@ -610,6 +610,9 @@ static int rpmpd_probe(struct platform_device *pdev) data->domains = devm_kcalloc(&pdev->dev, num, sizeof(*data->domains), GFP_KERNEL); + if (!data->domains) + return -ENOMEM; + data->num_domains = num; for (i = 0; i < num; i++) { diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 72386bd393fed..2f03ced0f4113 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -450,9 +450,9 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev) return PTR_ERR(m3_ipc->ipc_mem_base); irq = platform_get_irq(pdev, 0); - if (!irq) { + if (irq < 0) { dev_err(&pdev->dev, "no irq resource\n"); - return -ENXIO; + return irq; } ret = devm_request_irq(dev, irq, wkup_m3_txev_handler, diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c index 0ca2a3e3a02e2..747983743a14b 100644 --- a/drivers/soundwire/dmi-quirks.c +++ b/drivers/soundwire/dmi-quirks.c @@ -59,7 +59,7 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Conv"), }, .driver_data = (void *)intel_tgl_bios, }, diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 122f7a29d8ca9..63101f1ba2713 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -448,8 +448,8 @@ static void intel_shim_wake(struct sdw_intel *sdw, bool wake_enable) /* Clear wake status */ wake_sts = intel_readw(shim, SDW_SHIM_WAKESTS); - wake_sts |= (SDW_SHIM_WAKEEN_ENABLE << link_id); - intel_writew(shim, SDW_SHIM_WAKESTS_STATUS, wake_sts); + wake_sts |= (SDW_SHIM_WAKESTS_STATUS << link_id); + intel_writew(shim, SDW_SHIM_WAKESTS, wake_sts); } mutex_unlock(sdw->link_res->shim_lock); } diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c index b6c7467f0b590..d403a7a3021d0 100644 --- a/drivers/spi/spi-fsi.c +++ b/drivers/spi/spi-fsi.c @@ -25,6 +25,7 @@ #define SPI_FSI_BASE 0x70000 #define SPI_FSI_INIT_TIMEOUT_MS 1000 +#define SPI_FSI_STATUS_TIMEOUT_MS 100 #define SPI_FSI_MAX_RX_SIZE 8 #define SPI_FSI_MAX_TX_SIZE 40 @@ -299,6 +300,7 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx, struct spi_transfer *transfer) { int rc = 0; + unsigned long end; u64 status = 0ULL; if (transfer->tx_buf) { @@ -315,10 +317,14 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx, if (rc) return rc; + end = jiffies + msecs_to_jiffies(SPI_FSI_STATUS_TIMEOUT_MS); do { rc = fsi_spi_status(ctx, &status, "TX"); if (rc) return rc; + + if (time_after(jiffies, end)) + return -ETIMEDOUT; } while (status & SPI_FSI_STATUS_TDR_FULL); sent += nb; @@ -329,10 +335,14 @@ static int fsi_spi_transfer_data(struct fsi_spi *ctx, u8 *rx = transfer->rx_buf; while (transfer->len > recv) { + end = jiffies + msecs_to_jiffies(SPI_FSI_STATUS_TIMEOUT_MS); do { rc = fsi_spi_status(ctx, &status, "RX"); if (rc) return rc; + + if (time_after(jiffies, end)) + return -ETIMEDOUT; } while (!(status & SPI_FSI_STATUS_RDR_FULL)); rc = fsi_spi_read_reg(ctx, SPI_FSI_DATA_RX, &in); diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 753bd313e6fda..2ca19b01948a2 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -43,8 +43,11 @@ #define SPI_CFG1_PACKET_LOOP_OFFSET 8 #define SPI_CFG1_PACKET_LENGTH_OFFSET 16 #define SPI_CFG1_GET_TICK_DLY_OFFSET 29 +#define SPI_CFG1_GET_TICK_DLY_OFFSET_V1 30 #define SPI_CFG1_GET_TICK_DLY_MASK 0xe0000000 +#define SPI_CFG1_GET_TICK_DLY_MASK_V1 0xc0000000 + #define SPI_CFG1_CS_IDLE_MASK 0xff #define SPI_CFG1_PACKET_LOOP_MASK 0xff00 #define SPI_CFG1_PACKET_LENGTH_MASK 0x3ff0000 @@ -346,9 +349,15 @@ static int mtk_spi_prepare_message(struct spi_master *master, /* tick delay */ reg_val = readl(mdata->base + SPI_CFG1_REG); - reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK; - reg_val |= ((chip_config->tick_delay & 0x7) - << SPI_CFG1_GET_TICK_DLY_OFFSET); + if (mdata->dev_comp->enhance_timing) { + reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK; + reg_val |= ((chip_config->tick_delay & 0x7) + << SPI_CFG1_GET_TICK_DLY_OFFSET); + } else { + reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK_V1; + reg_val |= ((chip_config->tick_delay & 0x3) + << SPI_CFG1_GET_TICK_DLY_OFFSET_V1); + } writel(reg_val, mdata->base + SPI_CFG1_REG); /* set hw cs timing */ diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c index 45889947afed8..03fce4493aa79 100644 --- a/drivers/spi/spi-mxic.c +++ b/drivers/spi/spi-mxic.c @@ -304,25 +304,21 @@ static int mxic_spi_data_xfer(struct mxic_spi *mxic, const void *txbuf, writel(data, mxic->regs + TXD(nbytes % 4)); + ret = readl_poll_timeout(mxic->regs + INT_STS, sts, + sts & INT_TX_EMPTY, 0, USEC_PER_SEC); + if (ret) + return ret; + + ret = readl_poll_timeout(mxic->regs + INT_STS, sts, + sts & INT_RX_NOT_EMPTY, 0, + USEC_PER_SEC); + if (ret) + return ret; + + data = readl(mxic->regs + RXD); if (rxbuf) { - ret = readl_poll_timeout(mxic->regs + INT_STS, sts, - sts & INT_TX_EMPTY, 0, - USEC_PER_SEC); - if (ret) - return ret; - - ret = readl_poll_timeout(mxic->regs + INT_STS, sts, - sts & INT_RX_NOT_EMPTY, 0, - USEC_PER_SEC); - if (ret) - return ret; - - data = readl(mxic->regs + RXD); data >>= (8 * (4 - nbytes)); memcpy(rxbuf + pos, &data, nbytes); - WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY); - } else { - readl(mxic->regs + RXD); } WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY); diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 2e134eb4bd2c9..6502fda6243e0 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -76,14 +76,23 @@ static bool lpss_dma_filter(struct dma_chan *chan, void *param) return true; } +static void lpss_dma_put_device(void *dma_dev) +{ + pci_dev_put(dma_dev); +} + static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { struct pci_dev *dma_dev; + int ret; c->num_chipselect = 1; c->max_clk_rate = 50000000; dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); + if (ret) + return ret; if (c->tx_param) { struct dw_dma_slave *slave = c->tx_param; @@ -107,8 +116,9 @@ static int lpss_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) { - struct pci_dev *dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); struct dw_dma_slave *tx, *rx; + struct pci_dev *dma_dev; + int ret; switch (PCI_FUNC(dev->devfn)) { case 0: @@ -133,6 +143,11 @@ static int mrfld_spi_setup(struct pci_dev *dev, struct pxa_spi_info *c) return -ENODEV; } + dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(21, 0)); + ret = devm_add_action_or_reset(&dev->dev, lpss_dma_put_device, dma_dev); + if (ret) + return ret; + tx = c->tx_param; tx->dma_dev = &dma_dev->dev; diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index e9de1d958bbd2..8f345247a8c32 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -1352,6 +1352,10 @@ static int tegra_spi_probe(struct platform_device *pdev) tspi->phys = r->start; spi_irq = platform_get_irq(pdev, 0); + if (spi_irq < 0) { + ret = spi_irq; + goto exit_free_master; + } tspi->irq = spi_irq; tspi->clk = devm_clk_get(&pdev->dev, "spi"); diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 2a03739a0c609..80c3787deea9d 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1006,14 +1006,8 @@ static int tegra_slink_probe(struct platform_device *pdev) struct resource *r; int ret, spi_irq; const struct tegra_slink_chip_data *cdata = NULL; - const struct of_device_id *match; - match = of_match_device(tegra_slink_of_match, &pdev->dev); - if (!match) { - dev_err(&pdev->dev, "Error: No device match found\n"); - return -ENODEV; - } - cdata = match->data; + cdata = of_device_get_match_data(&pdev->dev); master = spi_alloc_master(&pdev->dev, sizeof(*tspi)); if (!master) { diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index ce1bdb4767ea3..cb00ac2fc7d8e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -1240,6 +1240,8 @@ static int tegra_qspi_probe(struct platform_device *pdev) tqspi->phys = r->start; qspi_irq = platform_get_irq(pdev, 0); + if (qspi_irq < 0) + return qspi_irq; tqspi->irq = qspi_irq; tqspi->clk = devm_clk_get(&pdev->dev, "qspi"); diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 328b6559bb19a..2b5afae8ff7fc 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1172,7 +1172,10 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) goto clk_dis_all; } - dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + if (ret) + goto clk_dis_all; + ctlr->bits_per_word_mask = SPI_BPW_MASK(8); ctlr->num_chipselect = GQSPI_DEFAULT_NUM_CS; ctlr->mem_ops = &zynqmp_qspi_mem_ops; diff --git a/drivers/staging/iio/adc/ad7280a.c b/drivers/staging/iio/adc/ad7280a.c index fef0055b89909..20183b2ea1279 100644 --- a/drivers/staging/iio/adc/ad7280a.c +++ b/drivers/staging/iio/adc/ad7280a.c @@ -107,9 +107,9 @@ static unsigned int ad7280a_devaddr(unsigned int addr) { return ((addr & 0x1) << 4) | - ((addr & 0x2) << 3) | + ((addr & 0x2) << 2) | (addr & 0x4) | - ((addr & 0x8) >> 3) | + ((addr & 0x8) >> 2) | ((addr & 0x10) >> 4); } diff --git a/drivers/staging/media/atomisp/pci/atomisp_acc.c b/drivers/staging/media/atomisp/pci/atomisp_acc.c index 9a1751895ab03..28cb271663c47 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_acc.c +++ b/drivers/staging/media/atomisp/pci/atomisp_acc.c @@ -439,6 +439,18 @@ int atomisp_acc_s_mapped_arg(struct atomisp_sub_device *asd, return 0; } +static void atomisp_acc_unload_some_extensions(struct atomisp_sub_device *asd, + int i, + struct atomisp_acc_fw *acc_fw) +{ + while (--i >= 0) { + if (acc_fw->flags & acc_flag_to_pipe[i].flag) { + atomisp_css_unload_acc_extension(asd, acc_fw->fw, + acc_flag_to_pipe[i].pipe_id); + } + } +} + /* * Appends the loaded acceleration binary extensions to the * current ISP mode. Must be called just before sh_css_start(). @@ -479,16 +491,20 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd) acc_fw->fw, acc_flag_to_pipe[i].pipe_id, acc_fw->type); - if (ret) + if (ret) { + atomisp_acc_unload_some_extensions(asd, i, acc_fw); goto error; + } ext_loaded = true; } } ret = atomisp_css_set_acc_parameters(acc_fw); - if (ret < 0) + if (ret < 0) { + atomisp_acc_unload_some_extensions(asd, i, acc_fw); goto error; + } } if (!ext_loaded) @@ -497,6 +513,7 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd) ret = atomisp_css_update_stream(asd); if (ret) { dev_err(isp->dev, "%s: update stream failed.\n", __func__); + atomisp_acc_unload_extensions(asd); goto error; } @@ -504,13 +521,6 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd) return 0; error: - while (--i >= 0) { - if (acc_fw->flags & acc_flag_to_pipe[i].flag) { - atomisp_css_unload_acc_extension(asd, acc_fw->fw, - acc_flag_to_pipe[i].pipe_id); - } - } - list_for_each_entry_continue_reverse(acc_fw, &asd->acc.fw, list) { if (acc_fw->type != ATOMISP_ACC_FW_LOAD_TYPE_OUTPUT && acc_fw->type != ATOMISP_ACC_FW_LOAD_TYPE_VIEWFINDER) diff --git a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c index 1cc581074ba76..9a194fbb305b7 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c +++ b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c @@ -748,6 +748,21 @@ static int axp_regulator_set(struct device *dev, struct gmin_subdev *gs, return 0; } +/* + * Some boards contain a hw-bug where turning eldo2 back on after having turned + * it off causes the CPLM3218 ambient-light-sensor on the image-sensor's I2C bus + * to crash, hanging the bus. Do not turn eldo2 off on these systems. + */ +static const struct dmi_system_id axp_leave_eldo2_on_ids[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TrekStor"), + DMI_MATCH(DMI_PRODUCT_NAME, "SurfTab duo W1 10.1 (VT4)"), + }, + }, + { } +}; + static int axp_v1p8_on(struct device *dev, struct gmin_subdev *gs) { int ret; @@ -782,6 +797,9 @@ static int axp_v1p8_off(struct device *dev, struct gmin_subdev *gs) if (ret) return ret; + if (dmi_check_system(axp_leave_eldo2_on_ids)) + return 0; + ret = axp_regulator_set(dev, gs, gs->eldo2_sel_reg, gs->eldo2_1p8v, ELDO_CTRL_REG, gs->eldo2_ctrl_shift, false); return ret; diff --git a/drivers/staging/media/atomisp/pci/hmm/hmm.c b/drivers/staging/media/atomisp/pci/hmm/hmm.c index 6a5ee46070898..c1cda16f2dc01 100644 --- a/drivers/staging/media/atomisp/pci/hmm/hmm.c +++ b/drivers/staging/media/atomisp/pci/hmm/hmm.c @@ -39,7 +39,7 @@ struct hmm_bo_device bo_device; struct hmm_pool dynamic_pool; struct hmm_pool reserved_pool; -static ia_css_ptr dummy_ptr; +static ia_css_ptr dummy_ptr = mmgr_EXCEPTION; static bool hmm_initialized; struct _hmm_mem_stat hmm_mem_stat; @@ -209,7 +209,7 @@ int hmm_init(void) void hmm_cleanup(void) { - if (!dummy_ptr) + if (dummy_ptr == mmgr_EXCEPTION) return; sysfs_remove_group(&atomisp_dev->kobj, atomisp_attribute_group); @@ -288,7 +288,8 @@ void hmm_free(ia_css_ptr virt) dev_dbg(atomisp_dev, "%s: free 0x%08x\n", __func__, virt); - WARN_ON(!virt); + if (WARN_ON(virt == mmgr_EXCEPTION)) + return; bo = hmm_bo_device_search_start(&bo_device, (unsigned int)virt); diff --git a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c index 1450013d3685d..c5d32048d90ff 100644 --- a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c +++ b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c @@ -23,7 +23,7 @@ static void hantro_h1_set_src_img_ctrl(struct hantro_dev *vpu, reg = H1_REG_IN_IMG_CTRL_ROW_LEN(pix_fmt->width) | H1_REG_IN_IMG_CTRL_OVRFLR_D4(0) - | H1_REG_IN_IMG_CTRL_OVRFLB_D4(0) + | H1_REG_IN_IMG_CTRL_OVRFLB(0) | H1_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt); vepu_write_relaxed(vpu, reg, H1_REG_IN_IMG_CTRL); } diff --git a/drivers/staging/media/hantro/hantro_h1_regs.h b/drivers/staging/media/hantro/hantro_h1_regs.h index d6e9825bb5c7b..30e7e7b920b55 100644 --- a/drivers/staging/media/hantro/hantro_h1_regs.h +++ b/drivers/staging/media/hantro/hantro_h1_regs.h @@ -47,7 +47,7 @@ #define H1_REG_IN_IMG_CTRL 0x03c #define H1_REG_IN_IMG_CTRL_ROW_LEN(x) ((x) << 12) #define H1_REG_IN_IMG_CTRL_OVRFLR_D4(x) ((x) << 10) -#define H1_REG_IN_IMG_CTRL_OVRFLB_D4(x) ((x) << 6) +#define H1_REG_IN_IMG_CTRL_OVRFLB(x) ((x) << 6) #define H1_REG_IN_IMG_CTRL_FMT(x) ((x) << 2) #define H1_REG_ENC_CTRL0 0x040 #define H1_REG_ENC_CTRL0_INIT_QP(x) ((x) << 26) diff --git a/drivers/staging/media/hantro/sunxi_vpu_hw.c b/drivers/staging/media/hantro/sunxi_vpu_hw.c index 90633406c4eb8..c0edd5856a0c8 100644 --- a/drivers/staging/media/hantro/sunxi_vpu_hw.c +++ b/drivers/staging/media/hantro/sunxi_vpu_hw.c @@ -29,10 +29,10 @@ static const struct hantro_fmt sunxi_vpu_dec_fmts[] = { .frmsize = { .min_width = 48, .max_width = 3840, - .step_width = MB_DIM, + .step_width = 32, .min_height = 48, .max_height = 2160, - .step_height = MB_DIM, + .step_height = 32, }, }, }; diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c index 2b73fa55c938b..9ea723bb5f209 100644 --- a/drivers/staging/media/imx/imx7-mipi-csis.c +++ b/drivers/staging/media/imx/imx7-mipi-csis.c @@ -32,7 +32,6 @@ #include #define CSIS_DRIVER_NAME "imx7-mipi-csis" -#define CSIS_SUBDEV_NAME CSIS_DRIVER_NAME #define CSIS_PAD_SINK 0 #define CSIS_PAD_SOURCE 1 @@ -311,7 +310,6 @@ struct csi_state { struct reset_control *mrst; struct regulator *mipi_phy_regulator; const struct mipi_csis_info *info; - u8 index; struct v4l2_subdev sd; struct media_pad pads[CSIS_PADS_NUM]; @@ -1303,8 +1301,8 @@ static int mipi_csis_subdev_init(struct csi_state *state) v4l2_subdev_init(sd, &mipi_csis_subdev_ops); sd->owner = THIS_MODULE; - snprintf(sd->name, sizeof(sd->name), "%s.%d", - CSIS_SUBDEV_NAME, state->index); + snprintf(sd->name, sizeof(sd->name), "csis-%s", + dev_name(state->dev)); sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE; sd->ctrl_handler = NULL; diff --git a/drivers/staging/media/imx/imx8mq-mipi-csi2.c b/drivers/staging/media/imx/imx8mq-mipi-csi2.c index 7adbdd14daa93..3b9fa75efac6b 100644 --- a/drivers/staging/media/imx/imx8mq-mipi-csi2.c +++ b/drivers/staging/media/imx/imx8mq-mipi-csi2.c @@ -398,9 +398,6 @@ static int imx8mq_mipi_csi_s_stream(struct v4l2_subdev *sd, int enable) struct csi_state *state = mipi_sd_to_csi2_state(sd); int ret = 0; - imx8mq_mipi_csi_write(state, CSI2RX_IRQ_MASK, - CSI2RX_IRQ_MASK_ULPS_STATUS_CHANGE); - if (enable) { ret = pm_runtime_resume_and_get(state->dev); if (ret < 0) @@ -696,7 +693,7 @@ static int imx8mq_mipi_csi_async_register(struct csi_state *state) * Suspend/resume */ -static int imx8mq_mipi_csi_pm_suspend(struct device *dev, bool runtime) +static int imx8mq_mipi_csi_pm_suspend(struct device *dev) { struct v4l2_subdev *sd = dev_get_drvdata(dev); struct csi_state *state = mipi_sd_to_csi2_state(sd); @@ -708,36 +705,21 @@ static int imx8mq_mipi_csi_pm_suspend(struct device *dev, bool runtime) imx8mq_mipi_csi_stop_stream(state); imx8mq_mipi_csi_clk_disable(state); state->state &= ~ST_POWERED; - if (!runtime) - state->state |= ST_SUSPENDED; } mutex_unlock(&state->lock); - ret = icc_set_bw(state->icc_path, 0, 0); - if (ret) - dev_err(dev, "icc_set_bw failed with %d\n", ret); - return ret ? -EAGAIN : 0; } -static int imx8mq_mipi_csi_pm_resume(struct device *dev, bool runtime) +static int imx8mq_mipi_csi_pm_resume(struct device *dev) { struct v4l2_subdev *sd = dev_get_drvdata(dev); struct csi_state *state = mipi_sd_to_csi2_state(sd); int ret = 0; - ret = icc_set_bw(state->icc_path, 0, state->icc_path_bw); - if (ret) { - dev_err(dev, "icc_set_bw failed with %d\n", ret); - return ret; - } - mutex_lock(&state->lock); - if (!runtime && !(state->state & ST_SUSPENDED)) - goto unlock; - if (!(state->state & ST_POWERED)) { state->state |= ST_POWERED; ret = imx8mq_mipi_csi_clk_enable(state); @@ -758,22 +740,60 @@ static int imx8mq_mipi_csi_pm_resume(struct device *dev, bool runtime) static int __maybe_unused imx8mq_mipi_csi_suspend(struct device *dev) { - return imx8mq_mipi_csi_pm_suspend(dev, false); + struct v4l2_subdev *sd = dev_get_drvdata(dev); + struct csi_state *state = mipi_sd_to_csi2_state(sd); + int ret; + + ret = imx8mq_mipi_csi_pm_suspend(dev); + if (ret) + return ret; + + state->state |= ST_SUSPENDED; + + return ret; } static int __maybe_unused imx8mq_mipi_csi_resume(struct device *dev) { - return imx8mq_mipi_csi_pm_resume(dev, false); + struct v4l2_subdev *sd = dev_get_drvdata(dev); + struct csi_state *state = mipi_sd_to_csi2_state(sd); + + if (!(state->state & ST_SUSPENDED)) + return 0; + + return imx8mq_mipi_csi_pm_resume(dev); } static int __maybe_unused imx8mq_mipi_csi_runtime_suspend(struct device *dev) { - return imx8mq_mipi_csi_pm_suspend(dev, true); + struct v4l2_subdev *sd = dev_get_drvdata(dev); + struct csi_state *state = mipi_sd_to_csi2_state(sd); + int ret; + + ret = imx8mq_mipi_csi_pm_suspend(dev); + if (ret) + return ret; + + ret = icc_set_bw(state->icc_path, 0, 0); + if (ret) + dev_err(dev, "icc_set_bw failed with %d\n", ret); + + return ret; } static int __maybe_unused imx8mq_mipi_csi_runtime_resume(struct device *dev) { - return imx8mq_mipi_csi_pm_resume(dev, true); + struct v4l2_subdev *sd = dev_get_drvdata(dev); + struct csi_state *state = mipi_sd_to_csi2_state(sd); + int ret; + + ret = icc_set_bw(state->icc_path, 0, state->icc_path_bw); + if (ret) { + dev_err(dev, "icc_set_bw failed with %d\n", ret); + return ret; + } + + return imx8mq_mipi_csi_pm_resume(dev); } static const struct dev_pm_ops imx8mq_mipi_csi_pm_ops = { @@ -921,7 +941,7 @@ static int imx8mq_mipi_csi_probe(struct platform_device *pdev) /* Enable runtime PM. */ pm_runtime_enable(dev); if (!pm_runtime_enabled(dev)) { - ret = imx8mq_mipi_csi_pm_resume(dev, true); + ret = imx8mq_mipi_csi_runtime_resume(dev); if (ret < 0) goto icc; } @@ -934,7 +954,7 @@ static int imx8mq_mipi_csi_probe(struct platform_device *pdev) cleanup: pm_runtime_disable(&pdev->dev); - imx8mq_mipi_csi_pm_suspend(&pdev->dev, true); + imx8mq_mipi_csi_runtime_suspend(&pdev->dev); media_entity_cleanup(&state->sd.entity); v4l2_async_nf_unregister(&state->notifier); @@ -958,7 +978,7 @@ static int imx8mq_mipi_csi_remove(struct platform_device *pdev) v4l2_async_unregister_subdev(&state->sd); pm_runtime_disable(&pdev->dev); - imx8mq_mipi_csi_pm_suspend(&pdev->dev, true); + imx8mq_mipi_csi_runtime_suspend(&pdev->dev); media_entity_cleanup(&state->sd.entity); mutex_destroy(&state->lock); pm_runtime_set_suspended(&pdev->dev); diff --git a/drivers/staging/media/meson/vdec/esparser.c b/drivers/staging/media/meson/vdec/esparser.c index db7022707ff8d..86ccc8937afca 100644 --- a/drivers/staging/media/meson/vdec/esparser.c +++ b/drivers/staging/media/meson/vdec/esparser.c @@ -328,7 +328,12 @@ esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf) offset = esparser_get_offset(sess); - amvdec_add_ts(sess, vb->timestamp, vbuf->timecode, offset, vbuf->flags); + ret = amvdec_add_ts(sess, vb->timestamp, vbuf->timecode, offset, vbuf->flags); + if (ret) { + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR); + return ret; + } + dev_dbg(core->dev, "esparser: ts = %llu pld_size = %u offset = %08X flags = %08X\n", vb->timestamp, payload_size, offset, vbuf->flags); diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.c b/drivers/staging/media/meson/vdec/vdec_helpers.c index 203d7afa085d7..7d2a756532503 100644 --- a/drivers/staging/media/meson/vdec/vdec_helpers.c +++ b/drivers/staging/media/meson/vdec/vdec_helpers.c @@ -227,13 +227,16 @@ int amvdec_set_canvases(struct amvdec_session *sess, } EXPORT_SYMBOL_GPL(amvdec_set_canvases); -void amvdec_add_ts(struct amvdec_session *sess, u64 ts, - struct v4l2_timecode tc, u32 offset, u32 vbuf_flags) +int amvdec_add_ts(struct amvdec_session *sess, u64 ts, + struct v4l2_timecode tc, u32 offset, u32 vbuf_flags) { struct amvdec_timestamp *new_ts; unsigned long flags; new_ts = kzalloc(sizeof(*new_ts), GFP_KERNEL); + if (!new_ts) + return -ENOMEM; + new_ts->ts = ts; new_ts->tc = tc; new_ts->offset = offset; @@ -242,6 +245,7 @@ void amvdec_add_ts(struct amvdec_session *sess, u64 ts, spin_lock_irqsave(&sess->ts_spinlock, flags); list_add_tail(&new_ts->list, &sess->timestamps); spin_unlock_irqrestore(&sess->ts_spinlock, flags); + return 0; } EXPORT_SYMBOL_GPL(amvdec_add_ts); diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.h b/drivers/staging/media/meson/vdec/vdec_helpers.h index 88137d15aa3ad..4bf3e61d081b3 100644 --- a/drivers/staging/media/meson/vdec/vdec_helpers.h +++ b/drivers/staging/media/meson/vdec/vdec_helpers.h @@ -56,8 +56,8 @@ void amvdec_dst_buf_done_offset(struct amvdec_session *sess, * @offset: offset in the VIFIFO where the associated packet was written * @flags: the vb2_v4l2_buffer flags */ -void amvdec_add_ts(struct amvdec_session *sess, u64 ts, - struct v4l2_timecode tc, u32 offset, u32 flags); +int amvdec_add_ts(struct amvdec_session *sess, u64 ts, + struct v4l2_timecode tc, u32 offset, u32 flags); void amvdec_remove_ts(struct amvdec_session *sess, u64 ts); /** diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c index b4173a8926d69..d8fb93035470e 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c @@ -38,7 +38,7 @@ struct cedrus_h264_sram_ref_pic { #define CEDRUS_H264_FRAME_NUM 18 -#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K) +#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (32 * SZ_1K) #define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K) static void cedrus_h264_write_sram(struct cedrus_dev *dev, diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c index 8829a7bab07ec..ffade5cbd2e40 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -23,7 +23,7 @@ * Subsequent BSP implementations seem to double the neighbor info buffer size * for the H6 SoC, which may be related to 10 bit H265 support. */ -#define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE (397 * SZ_1K) +#define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE (794 * SZ_1K) #define CEDRUS_H265_ENTRY_POINTS_BUF_SIZE (4 * SZ_1K) #define CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE 160 diff --git a/drivers/staging/media/zoran/zoran.h b/drivers/staging/media/zoran/zoran.h index b1ad2a2b914cd..50d5a7acfab6c 100644 --- a/drivers/staging/media/zoran/zoran.h +++ b/drivers/staging/media/zoran/zoran.h @@ -313,6 +313,6 @@ static inline struct zoran *to_zoran(struct v4l2_device *v4l2_dev) #endif -int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq); +int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq, int dir); void zoran_queue_exit(struct zoran *zr); int zr_set_buf(struct zoran *zr); diff --git a/drivers/staging/media/zoran/zoran_card.c b/drivers/staging/media/zoran/zoran_card.c index f259585b06897..11d415c0c05d2 100644 --- a/drivers/staging/media/zoran/zoran_card.c +++ b/drivers/staging/media/zoran/zoran_card.c @@ -803,6 +803,52 @@ int zoran_check_jpg_settings(struct zoran *zr, return 0; } +static int zoran_init_video_device(struct zoran *zr, struct video_device *video_dev, int dir) +{ + int err; + + /* Now add the template and register the device unit. */ + *video_dev = zoran_template; + video_dev->v4l2_dev = &zr->v4l2_dev; + video_dev->lock = &zr->lock; + video_dev->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE | dir; + + strscpy(video_dev->name, ZR_DEVNAME(zr), sizeof(video_dev->name)); + /* + * It's not a mem2mem device, but you can both capture and output from one and the same + * device. This should really be split up into two device nodes, but that's a job for + * another day. + */ + video_dev->vfl_dir = VFL_DIR_M2M; + zoran_queue_init(zr, &zr->vq, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + err = video_register_device(video_dev, VFL_TYPE_VIDEO, video_nr[zr->id]); + if (err < 0) + return err; + video_set_drvdata(video_dev, zr); + return 0; +} + +static void zoran_exit_video_devices(struct zoran *zr) +{ + video_unregister_device(zr->video_dev); + kfree(zr->video_dev); +} + +static int zoran_init_video_devices(struct zoran *zr) +{ + int err; + + zr->video_dev = video_device_alloc(); + if (!zr->video_dev) + return -ENOMEM; + + err = zoran_init_video_device(zr, zr->video_dev, V4L2_CAP_VIDEO_CAPTURE); + if (err) + kfree(zr->video_dev); + return err; +} + void zoran_open_init_params(struct zoran *zr) { int i; @@ -874,17 +920,11 @@ static int zr36057_init(struct zoran *zr) zoran_open_init_params(zr); /* allocate memory *before* doing anything to the hardware in case allocation fails */ - zr->video_dev = video_device_alloc(); - if (!zr->video_dev) { - err = -ENOMEM; - goto exit; - } zr->stat_com = dma_alloc_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32), &zr->p_sc, GFP_KERNEL); if (!zr->stat_com) { - err = -ENOMEM; - goto exit_video; + return -ENOMEM; } for (j = 0; j < BUZ_NUM_STAT_COM; j++) zr->stat_com[j] = cpu_to_le32(1); /* mark as unavailable to zr36057 */ @@ -897,26 +937,9 @@ static int zr36057_init(struct zoran *zr) goto exit_statcom; } - /* Now add the template and register the device unit. */ - *zr->video_dev = zoran_template; - zr->video_dev->v4l2_dev = &zr->v4l2_dev; - zr->video_dev->lock = &zr->lock; - zr->video_dev->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_CAPTURE; - - strscpy(zr->video_dev->name, ZR_DEVNAME(zr), sizeof(zr->video_dev->name)); - /* - * It's not a mem2mem device, but you can both capture and output from one and the same - * device. This should really be split up into two device nodes, but that's a job for - * another day. - */ - zr->video_dev->vfl_dir = VFL_DIR_M2M; - - zoran_queue_init(zr, &zr->vq); - - err = video_register_device(zr->video_dev, VFL_TYPE_VIDEO, video_nr[zr->id]); - if (err < 0) + err = zoran_init_video_devices(zr); + if (err) goto exit_statcomb; - video_set_drvdata(zr->video_dev, zr); zoran_init_hardware(zr); if (!pass_through) { @@ -931,9 +954,6 @@ static int zr36057_init(struct zoran *zr) dma_free_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32) * 2, zr->stat_comb, zr->p_scb); exit_statcom: dma_free_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32), zr->stat_com, zr->p_sc); -exit_video: - kfree(zr->video_dev); -exit: return err; } @@ -965,7 +985,7 @@ static void zoran_remove(struct pci_dev *pdev) dma_free_coherent(&zr->pci_dev->dev, BUZ_NUM_STAT_COM * sizeof(u32) * 2, zr->stat_comb, zr->p_scb); pci_release_regions(pdev); pci_disable_device(zr->pci_dev); - video_unregister_device(zr->video_dev); + zoran_exit_video_devices(zr); exit_free: v4l2_ctrl_handler_free(&zr->hdl); v4l2_device_unregister(&zr->v4l2_dev); @@ -1069,8 +1089,10 @@ static int zoran_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) - return -ENODEV; - vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32)); + return err; + err = vb2_dma_contig_set_max_seg_size(&pdev->dev, U32_MAX); + if (err) + return err; nr = zoran_num++; if (nr >= BUZ_MAX) { diff --git a/drivers/staging/media/zoran/zoran_device.c b/drivers/staging/media/zoran/zoran_device.c index 5b12a730a2290..fb1f0465ca87f 100644 --- a/drivers/staging/media/zoran/zoran_device.c +++ b/drivers/staging/media/zoran/zoran_device.c @@ -814,7 +814,7 @@ static void zoran_reap_stat_com(struct zoran *zr) if (zr->jpg_settings.tmp_dcm == 1) i = (zr->jpg_dma_tail - zr->jpg_err_shift) & BUZ_MASK_STAT_COM; else - i = ((zr->jpg_dma_tail - zr->jpg_err_shift) & 1) * 2 + 1; + i = ((zr->jpg_dma_tail - zr->jpg_err_shift) & 1) * 2; stat_com = le32_to_cpu(zr->stat_com[i]); if ((stat_com & 1) == 0) { @@ -826,6 +826,11 @@ static void zoran_reap_stat_com(struct zoran *zr) size = (stat_com & GENMASK(22, 1)) >> 1; buf = zr->inuse[i]; + if (!buf) { + spin_unlock_irqrestore(&zr->queued_bufs_lock, flags); + pci_err(zr->pci_dev, "No buffer at slot %d\n", i); + return; + } buf->vbuf.vb2_buf.timestamp = ktime_get_ns(); if (zr->codec_mode == BUZ_MODE_MOTION_COMPRESS) { diff --git a/drivers/staging/media/zoran/zoran_driver.c b/drivers/staging/media/zoran/zoran_driver.c index 46382e43f1bf7..84665637ebb79 100644 --- a/drivers/staging/media/zoran/zoran_driver.c +++ b/drivers/staging/media/zoran/zoran_driver.c @@ -255,8 +255,6 @@ static int zoran_querycap(struct file *file, void *__fh, struct v4l2_capability strscpy(cap->card, ZR_DEVNAME(zr), sizeof(cap->card)); strscpy(cap->driver, "zoran", sizeof(cap->driver)); snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", pci_name(zr->pci_dev)); - cap->device_caps = zr->video_dev->device_caps; - cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS; return 0; } @@ -582,6 +580,9 @@ static int zoran_s_std(struct file *file, void *__fh, v4l2_std_id std) struct zoran *zr = video_drvdata(file); int res = 0; + if (zr->norm == std) + return 0; + if (zr->running != ZORAN_MAP_MODE_NONE) return -EBUSY; @@ -739,6 +740,7 @@ static int zoran_g_parm(struct file *file, void *priv, struct v4l2_streamparm *p if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; + parm->parm.capture.readbuffers = 9; return 0; } @@ -869,6 +871,10 @@ int zr_set_buf(struct zoran *zr) vbuf = &buf->vbuf; buf->vbuf.field = V4L2_FIELD_INTERLACED; + if (BUZ_MAX_HEIGHT < (zr->v4l_settings.height * 2)) + buf->vbuf.field = V4L2_FIELD_INTERLACED; + else + buf->vbuf.field = V4L2_FIELD_TOP; vb2_set_plane_payload(&buf->vbuf.vb2_buf, 0, zr->buffer_size); vb2_buffer_done(&buf->vbuf.vb2_buf, VB2_BUF_STATE_DONE); zr->inuse[0] = NULL; @@ -928,6 +934,7 @@ static int zr_vb2_start_streaming(struct vb2_queue *vq, unsigned int count) zr->stat_com[j] = cpu_to_le32(1); zr->inuse[j] = NULL; } + zr->vbseq = 0; if (zr->map_mode != ZORAN_MAP_MODE_RAW) { pci_info(zr->pci_dev, "START JPG\n"); @@ -1008,7 +1015,7 @@ static const struct vb2_ops zr_video_qops = { .wait_finish = vb2_ops_wait_finish, }; -int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq) +int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq, int dir) { int err; @@ -1016,8 +1023,9 @@ int zoran_queue_init(struct zoran *zr, struct vb2_queue *vq) INIT_LIST_HEAD(&zr->queued_bufs); vq->dev = &zr->pci_dev->dev; - vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - vq->io_modes = VB2_USERPTR | VB2_DMABUF | VB2_MMAP | VB2_READ | VB2_WRITE; + vq->type = dir; + + vq->io_modes = VB2_DMABUF | VB2_MMAP | VB2_READ | VB2_WRITE; vq->drv_priv = zr; vq->buf_struct_size = sizeof(struct zr_buffer); vq->ops = &zr_video_qops; diff --git a/drivers/staging/mt7621-dts/gbpc1.dts b/drivers/staging/mt7621-dts/gbpc1.dts index e38a083811e54..5ae94b1ad5998 100644 --- a/drivers/staging/mt7621-dts/gbpc1.dts +++ b/drivers/staging/mt7621-dts/gbpc1.dts @@ -12,7 +12,8 @@ memory@0 { device_type = "memory"; - reg = <0x0 0x1c000000>, <0x20000000 0x4000000>; + reg = <0x00000000 0x1c000000>, + <0x20000000 0x04000000>; }; chosen { @@ -38,24 +39,16 @@ gpio-leds { compatible = "gpio-leds"; - system { - label = "gb-pc1:green:system"; + power { + label = "green:power"; gpios = <&gpio 6 GPIO_ACTIVE_LOW>; + linux,default-trigger = "default-on"; }; - status { - label = "gb-pc1:green:status"; + system { + label = "green:system"; gpios = <&gpio 8 GPIO_ACTIVE_LOW>; - }; - - lan1 { - label = "gb-pc1:green:lan1"; - gpios = <&gpio 24 GPIO_ACTIVE_LOW>; - }; - - lan2 { - label = "gb-pc1:green:lan2"; - gpios = <&gpio 25 GPIO_ACTIVE_LOW>; + linux,default-trigger = "disk-activity"; }; }; }; @@ -95,9 +88,8 @@ partition@50000 { label = "firmware"; - reg = <0x50000 0x1FB0000>; + reg = <0x50000 0x1fb0000>; }; - }; }; @@ -106,9 +98,12 @@ }; &pinctrl { - state_default: pinctrl0 { - default_gpio: gpio { - groups = "wdt", "rgmii2", "uart3"; + pinctrl-names = "default"; + pinctrl-0 = <&state_default>; + + state_default: state-default { + gpio-pinmux { + groups = "rgmii2", "uart3", "wdt"; function = "gpio"; }; }; @@ -117,12 +112,13 @@ &switch0 { ports { port@0 { + status = "okay"; label = "ethblack"; - status = "ok"; }; + port@4 { + status = "okay"; label = "ethblue"; - status = "ok"; }; }; }; diff --git a/drivers/staging/mt7621-dts/gbpc2.dts b/drivers/staging/mt7621-dts/gbpc2.dts index 6fe603c7711d7..a7fce8de61472 100644 --- a/drivers/staging/mt7621-dts/gbpc2.dts +++ b/drivers/staging/mt7621-dts/gbpc2.dts @@ -1,22 +1,122 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /dts-v1/; -#include "gbpc1.dts" +#include "mt7621.dtsi" + +#include +#include / { compatible = "gnubee,gb-pc2", "mediatek,mt7621-soc"; model = "GB-PC2"; + + memory@0 { + device_type = "memory"; + reg = <0x00000000 0x1c000000>, + <0x20000000 0x04000000>; + }; + + chosen { + bootargs = "console=ttyS0,57600"; + }; + + palmbus: palmbus@1e000000 { + i2c@900 { + status = "okay"; + }; + }; + + gpio-keys { + compatible = "gpio-keys"; + + reset { + label = "reset"; + gpios = <&gpio 18 GPIO_ACTIVE_HIGH>; + linux,code = ; + }; + }; +}; + +&sdhci { + status = "okay"; +}; + +&spi0 { + status = "okay"; + + m25p80@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "jedec,spi-nor"; + reg = <0>; + spi-max-frequency = <50000000>; + broken-flash-reset; + + partition@0 { + label = "u-boot"; + reg = <0x0 0x30000>; + read-only; + }; + + partition@30000 { + label = "u-boot-env"; + reg = <0x30000 0x10000>; + read-only; + }; + + factory: partition@40000 { + label = "factory"; + reg = <0x40000 0x10000>; + read-only; + }; + + partition@50000 { + label = "firmware"; + reg = <0x50000 0x1fb0000>; + }; + }; }; -&default_gpio { - groups = "wdt", "uart3"; - function = "gpio"; +&pcie { + status = "okay"; }; -&gmac1 { - status = "ok"; +&pinctrl { + pinctrl-names = "default"; + pinctrl-0 = <&state_default>; + + state_default: state-default { + gpio-pinmux { + groups = "wdt"; + function = "gpio"; + }; + }; }; -&phy_external { - status = "ok"; +ðernet { + gmac1: mac@1 { + status = "okay"; + phy-handle = <ðphy7>; + }; + + mdio-bus { + ethphy7: ethernet-phy@7 { + reg = <7>; + phy-mode = "rgmii-rxid"; + }; + }; +}; + +&switch0 { + ports { + port@0 { + status = "okay"; + label = "ethblack"; + }; + + port@4 { + status = "okay"; + label = "ethblue"; + }; + }; }; diff --git a/drivers/staging/mt7621-dts/mt7621.dtsi b/drivers/staging/mt7621-dts/mt7621.dtsi index 644a65d1a6a16..786cdb5fc4da1 100644 --- a/drivers/staging/mt7621-dts/mt7621.dtsi +++ b/drivers/staging/mt7621-dts/mt7621.dtsi @@ -44,9 +44,9 @@ regulator-max-microvolt = <3300000>; enable-active-high; regulator-always-on; - }; + }; - mmc_fixed_1v8_io: fixedregulator@1 { + mmc_fixed_1v8_io: fixedregulator@1 { compatible = "regulator-fixed"; regulator-name = "mmc_io"; regulator-min-microvolt = <1800000>; @@ -325,37 +325,32 @@ mediatek,ethsys = <&sysc>; + pinctrl-names = "default"; + pinctrl-0 = <&mdio_pins>, <&rgmii1_pins>, <&rgmii2_pins>; gmac0: mac@0 { compatible = "mediatek,eth-mac"; reg = <0>; phy-mode = "rgmii"; + fixed-link { speed = <1000>; full-duplex; pause; }; }; + gmac1: mac@1 { compatible = "mediatek,eth-mac"; reg = <1>; status = "off"; phy-mode = "rgmii-rxid"; - phy-handle = <&phy_external>; }; + mdio-bus { #address-cells = <1>; #size-cells = <0>; - phy_external: ethernet-phy@5 { - status = "off"; - reg = <5>; - phy-mode = "rgmii-rxid"; - - pinctrl-names = "default"; - pinctrl-0 = <&rgmii2_pins>; - }; - switch0: switch0@0 { compatible = "mediatek,mt7621"; #address-cells = <1>; @@ -373,36 +368,43 @@ #address-cells = <1>; #size-cells = <0>; reg = <0>; + port@0 { status = "off"; reg = <0>; label = "lan0"; }; + port@1 { status = "off"; reg = <1>; label = "lan1"; }; + port@2 { status = "off"; reg = <2>; label = "lan2"; }; + port@3 { status = "off"; reg = <3>; label = "lan3"; }; + port@4 { status = "off"; reg = <4>; label = "lan4"; }; + port@6 { reg = <6>; label = "cpu"; ethernet = <&gmac0>; phy-mode = "trgmii"; + fixed-link { speed = <1000>; full-duplex; diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 9873bb2a9ee4f..113a3efd12e95 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4605,14 +4605,12 @@ static int qlge_probe(struct pci_dev *pdev, err = register_netdev(ndev); if (err) { dev_err(&pdev->dev, "net device registration failed.\n"); - qlge_release_all(pdev); - pci_disable_device(pdev); - goto netdev_free; + goto cleanup_pdev; } err = qlge_health_create_reporters(qdev); if (err) - goto netdev_free; + goto unregister_netdev; /* Start up the timer to trigger EEH if * the bus goes dead @@ -4626,6 +4624,11 @@ static int qlge_probe(struct pci_dev *pdev, devlink_register(devlink); return 0; +unregister_netdev: + unregister_netdev(ndev); +cleanup_pdev: + qlge_release_all(pdev); + pci_disable_device(pdev); netdev_free: free_netdev(ndev); devlink_free: diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c index 51a13262a226f..d120d61454a35 100644 --- a/drivers/staging/r8188eu/core/rtw_recv.c +++ b/drivers/staging/r8188eu/core/rtw_recv.c @@ -1853,8 +1853,7 @@ static int recv_func(struct adapter *padapter, struct recv_frame *rframe) struct recv_frame *pending_frame; int cnt = 0; - pending_frame = rtw_alloc_recvframe(&padapter->recvpriv.uc_swdec_pending_queue); - while (pending_frame) { + while ((pending_frame = rtw_alloc_recvframe(&padapter->recvpriv.uc_swdec_pending_queue))) { cnt++; recv_func_posthandle(padapter, pending_frame); } diff --git a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c index b818872e0d194..31a9b7500a7b6 100644 --- a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c +++ b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c @@ -538,10 +538,10 @@ static int load_firmware(struct rt_firmware *pFirmware, struct device *device) } memcpy(pFirmware->szFwBuffer, fw->data, fw->size); pFirmware->ulFwLength = fw->size; - release_firmware(fw); dev_dbg(device, "!bUsedWoWLANFw, FmrmwareLen:%d+\n", pFirmware->ulFwLength); Exit: + release_firmware(fw); return rtStatus; } diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 4f478812cb514..a0b599100106b 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -53,7 +53,7 @@ struct int3400_thermal_priv { struct art *arts; int trt_count; struct trt *trts; - u8 uuid_bitmap; + u32 uuid_bitmap; int rel_misc_dev_res; int current_uuid_index; char *data_vault; @@ -468,6 +468,11 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv) priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer, obj->package.elements[0].buffer.length, GFP_KERNEL); + if (!priv->data_vault) { + kfree(buffer.pointer); + return; + } + bin_attr_data_vault.private = priv->data_vault; bin_attr_data_vault.size = obj->package.elements[0].buffer.length; kfree(buffer.pointer); diff --git a/drivers/tty/hvc/hvc_iucv.c b/drivers/tty/hvc/hvc_iucv.c index 82a76cac94deb..32366caca6623 100644 --- a/drivers/tty/hvc/hvc_iucv.c +++ b/drivers/tty/hvc/hvc_iucv.c @@ -1417,7 +1417,9 @@ static int __init hvc_iucv_init(void) */ static int __init hvc_iucv_config(char *val) { - return kstrtoul(val, 10, &hvc_iucv_devices); + if (kstrtoul(val, 10, &hvc_iucv_devices)) + pr_warn("hvc_iucv= invalid parameter value '%s'\n", val); + return 1; } diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index c858aff721c41..fbb796f837532 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -744,6 +744,7 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) struct mxser_port *info = container_of(port, struct mxser_port, port); unsigned long page; unsigned long flags; + int ret; page = __get_free_page(GFP_KERNEL); if (!page) @@ -753,9 +754,9 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) if (!info->type) { set_bit(TTY_IO_ERROR, &tty->flags); - free_page(page); spin_unlock_irqrestore(&info->slock, flags); - return 0; + ret = 0; + goto err_free_xmit; } info->port.xmit_buf = (unsigned char *) page; @@ -775,8 +776,10 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) if (capable(CAP_SYS_ADMIN)) { set_bit(TTY_IO_ERROR, &tty->flags); return 0; - } else - return -ENODEV; + } + + ret = -ENODEV; + goto err_free_xmit; } /* @@ -821,6 +824,10 @@ static int mxser_activate(struct tty_port *port, struct tty_struct *tty) spin_unlock_irqrestore(&info->slock, flags); return 0; +err_free_xmit: + free_page(page); + info->port.xmit_buf = NULL; + return ret; } /* diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 2350fb3bb5e4c..c2cecc6f47db4 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -487,7 +487,7 @@ static int aspeed_vuart_probe(struct platform_device *pdev) port.port.irq = irq_of_parse_and_map(np, 0); port.port.handle_irq = aspeed_vuart_handle_irq; port.port.iotype = UPIO_MEM; - port.port.type = PORT_16550A; + port.port.type = PORT_ASPEED_VUART; port.port.uartclk = clk; port.port.flags = UPF_SHARE_IRQ | UPF_BOOT_AUTOCONF | UPF_IOREMAP | UPF_FIXED_PORT | UPF_FIXED_TYPE | UPF_NO_THRE_TEST; diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index 890fa7ddaa7f3..b3c3f7e5851ab 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -64,10 +64,19 @@ int serial8250_tx_dma(struct uart_8250_port *p) struct uart_8250_dma *dma = p->dma; struct circ_buf *xmit = &p->port.state->xmit; struct dma_async_tx_descriptor *desc; + struct uart_port *up = &p->port; int ret; - if (dma->tx_running) + if (dma->tx_running) { + if (up->x_char) { + dmaengine_pause(dma->txchan); + uart_xchar_out(up, UART_TX); + dmaengine_resume(dma->txchan); + } return 0; + } else if (up->x_char) { + uart_xchar_out(up, UART_TX); + } if (uart_tx_stopped(&p->port) || uart_circ_empty(xmit)) { /* We have been called from __dma_tx_complete() */ diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c index d3bafec7619da..0f5af061e0b45 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -117,8 +117,7 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port) { struct dw_dma_slave *param = &lpss->dma_param; struct pci_dev *pdev = to_pci_dev(port->dev); - unsigned int dma_devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); - struct pci_dev *dma_dev = pci_get_slot(pdev->bus, dma_devfn); + struct pci_dev *dma_dev; switch (pdev->device) { case PCI_DEVICE_ID_INTEL_BYT_UART1: @@ -137,6 +136,8 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port) return -EINVAL; } + dma_dev = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); + param->dma_dev = &dma_dev->dev; param->m_master = 0; param->p_master = 1; @@ -152,6 +153,14 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port) return 0; } +static void byt_serial_exit(struct lpss8250 *lpss) +{ + struct dw_dma_slave *param = &lpss->dma_param; + + /* Paired with pci_get_slot() in the byt_serial_setup() above */ + put_device(param->dma_dev); +} + static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port) { struct uart_8250_dma *dma = &lpss->data.dma; @@ -170,6 +179,13 @@ static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port) return 0; } +static void ehl_serial_exit(struct lpss8250 *lpss) +{ + struct uart_8250_port *up = serial8250_get_port(lpss->data.line); + + up->dma = NULL; +} + #ifdef CONFIG_SERIAL_8250_DMA static const struct dw_dma_platform_data qrk_serial_dma_pdata = { .nr_channels = 2, @@ -344,8 +360,7 @@ static int lpss8250_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_exit: - if (lpss->board->exit) - lpss->board->exit(lpss); + lpss->board->exit(lpss); pci_free_irq_vectors(pdev); return ret; } @@ -356,8 +371,7 @@ static void lpss8250_remove(struct pci_dev *pdev) serial8250_unregister_port(lpss->data.line); - if (lpss->board->exit) - lpss->board->exit(lpss); + lpss->board->exit(lpss); pci_free_irq_vectors(pdev); } @@ -365,12 +379,14 @@ static const struct lpss8250_board byt_board = { .freq = 100000000, .base_baud = 2764800, .setup = byt_serial_setup, + .exit = byt_serial_exit, }; static const struct lpss8250_board ehl_board = { .freq = 200000000, .base_baud = 12500000, .setup = ehl_serial_setup, + .exit = ehl_serial_exit, }; static const struct lpss8250_board qrk_board = { diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c index efa0515139f8e..e6c1791609ddf 100644 --- a/drivers/tty/serial/8250/8250_mid.c +++ b/drivers/tty/serial/8250/8250_mid.c @@ -73,6 +73,11 @@ static int pnw_setup(struct mid8250 *mid, struct uart_port *p) return 0; } +static void pnw_exit(struct mid8250 *mid) +{ + pci_dev_put(mid->dma_dev); +} + static int tng_handle_irq(struct uart_port *p) { struct mid8250 *mid = p->private_data; @@ -124,6 +129,11 @@ static int tng_setup(struct mid8250 *mid, struct uart_port *p) return 0; } +static void tng_exit(struct mid8250 *mid) +{ + pci_dev_put(mid->dma_dev); +} + static int dnv_handle_irq(struct uart_port *p) { struct mid8250 *mid = p->private_data; @@ -330,9 +340,9 @@ static int mid8250_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, mid); return 0; + err: - if (mid->board->exit) - mid->board->exit(mid); + mid->board->exit(mid); return ret; } @@ -342,8 +352,7 @@ static void mid8250_remove(struct pci_dev *pdev) serial8250_unregister_port(mid->line); - if (mid->board->exit) - mid->board->exit(mid); + mid->board->exit(mid); } static const struct mid8250_board pnw_board = { @@ -351,6 +360,7 @@ static const struct mid8250_board pnw_board = { .freq = 50000000, .base_baud = 115200, .setup = pnw_setup, + .exit = pnw_exit, }; static const struct mid8250_board tng_board = { @@ -358,6 +368,7 @@ static const struct mid8250_board tng_board = { .freq = 38400000, .base_baud = 1843200, .setup = tng_setup, + .exit = tng_exit, }; static const struct mid8250_board dnv_board = { diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 3b12bfc1ed67b..9f116e75956e2 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -307,6 +307,14 @@ static const struct serial8250_config uart_config[] = { .rxtrig_bytes = {1, 32, 64, 112}, .flags = UART_CAP_FIFO | UART_CAP_SLEEP, }, + [PORT_ASPEED_VUART] = { + .name = "ASPEED VUART", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, + .rxtrig_bytes = {1, 4, 8, 14}, + .flags = UART_CAP_FIFO, + }, }; /* Uart divisor latch read */ @@ -1615,6 +1623,18 @@ static inline void start_tx_rs485(struct uart_port *port) struct uart_8250_port *up = up_to_u8250p(port); struct uart_8250_em485 *em485 = up->em485; + /* + * While serial8250_em485_handle_stop_tx() is a noop if + * em485->active_timer != &em485->stop_tx_timer, it might happen that + * the timer is still armed and triggers only after the current bunch of + * chars is send and em485->active_timer == &em485->stop_tx_timer again. + * So cancel the timer. There is still a theoretical race condition if + * the timer is already running and only comes around to check for + * em485->active_timer when &em485->stop_tx_timer is armed again. + */ + if (em485->active_timer == &em485->stop_tx_timer) + hrtimer_try_to_cancel(&em485->stop_tx_timer); + em485->active_timer = NULL; if (em485->tx_stopped) { @@ -1799,9 +1819,7 @@ void serial8250_tx_chars(struct uart_8250_port *up) int count; if (port->x_char) { - serial_out(up, UART_TX, port->x_char); - port->icount.tx++; - port->x_char = 0; + uart_xchar_out(port, UART_TX); return; } if (uart_tx_stopped(port)) { diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 49d0c7f2b29b8..79b7db8580e05 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -403,16 +403,16 @@ static int kgdboc_option_setup(char *opt) { if (!opt) { pr_err("config string not provided\n"); - return -EINVAL; + return 1; } if (strlen(opt) >= MAX_CONFIG_LEN) { pr_err("config string too long\n"); - return -ENOSPC; + return 1; } strcpy(config, opt); - return 0; + return 1; } __setup("kgdboc=", kgdboc_option_setup); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 0db90be4c3bc3..f67540ae2a883 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -644,6 +644,20 @@ static void uart_flush_buffer(struct tty_struct *tty) tty_port_tty_wakeup(&state->port); } +/* + * This function performs low-level write of high-priority XON/XOFF + * character and accounting for it. + * + * Requires uart_port to implement .serial_out(). + */ +void uart_xchar_out(struct uart_port *uport, int offset) +{ + serial_port_out(uport, offset, uport->x_char); + uport->icount.tx++; + uport->x_char = 0; +} +EXPORT_SYMBOL_GPL(uart_xchar_out); + /* * This function is used to send a high-priority XON/XOFF character to * the device diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index df3522dab31b5..1e7dc130c39a6 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -762,7 +762,7 @@ static int xhci_exit_test_mode(struct xhci_hcd *xhci) } pm_runtime_allow(xhci_to_hcd(xhci)->self.controller); xhci->test_mode = 0; - return xhci_reset(xhci); + return xhci_reset(xhci, XHCI_RESET_SHORT_USEC); } void xhci_set_link_state(struct xhci_hcd *xhci, struct xhci_port *port, @@ -1088,6 +1088,9 @@ static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status, if (link_state == XDEV_U2) *status |= USB_PORT_STAT_L1; if (link_state == XDEV_U0) { + if (bus_state->resume_done[portnum]) + usb_hcd_end_port_resume(&port->rhub->hcd->self, + portnum); bus_state->resume_done[portnum] = 0; clear_bit(portnum, &bus_state->resuming_ports); if (bus_state->suspended_ports & (1 << portnum)) { diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 0e312066c5c63..b398d3fdabf61 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2583,7 +2583,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) fail: xhci_halt(xhci); - xhci_reset(xhci); + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); xhci_mem_cleanup(xhci); return -ENOMEM; } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2d378543bc3aa..7d1ad8d654cbb 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -65,7 +65,7 @@ static bool td_on_ring(struct xhci_td *td, struct xhci_ring *ring) * handshake done). There are two failure modes: "usec" have passed (major * hardware flakeout), or the register reads as all-ones (hardware removed). */ -int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec) +int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us) { u32 result; int ret; @@ -73,7 +73,7 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec) ret = readl_poll_timeout_atomic(ptr, result, (result & mask) == done || result == U32_MAX, - 1, usec); + 1, timeout_us); if (result == U32_MAX) /* card removed */ return -ENODEV; @@ -162,7 +162,7 @@ int xhci_start(struct xhci_hcd *xhci) * Transactions will be terminated immediately, and operational registers * will be set to their defaults. */ -int xhci_reset(struct xhci_hcd *xhci) +int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us) { u32 command; u32 state; @@ -195,8 +195,7 @@ int xhci_reset(struct xhci_hcd *xhci) if (xhci->quirks & XHCI_INTEL_HOST) udelay(1000); - ret = xhci_handshake(&xhci->op_regs->command, - CMD_RESET, 0, 10 * 1000 * 1000); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us); if (ret) return ret; @@ -209,8 +208,7 @@ int xhci_reset(struct xhci_hcd *xhci) * xHCI cannot write to any doorbells or operational registers other * than status until the "Controller Not Ready" flag is cleared. */ - ret = xhci_handshake(&xhci->op_regs->status, - STS_CNR, 0, 10 * 1000 * 1000); + ret = xhci_handshake(&xhci->op_regs->status, STS_CNR, 0, timeout_us); xhci->usb2_rhub.bus_state.port_c_suspend = 0; xhci->usb2_rhub.bus_state.suspended_ports = 0; @@ -731,7 +729,7 @@ static void xhci_stop(struct usb_hcd *hcd) xhci->xhc_state |= XHCI_STATE_HALTED; xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; xhci_halt(xhci); - xhci_reset(xhci); + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); spin_unlock_irq(&xhci->lock); xhci_cleanup_msix(xhci); @@ -784,7 +782,7 @@ void xhci_shutdown(struct usb_hcd *hcd) xhci_halt(xhci); /* Workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) - xhci_reset(xhci); + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); spin_unlock_irq(&xhci->lock); xhci_cleanup_msix(xhci); @@ -1170,7 +1168,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) xhci_dbg(xhci, "Stop HCD\n"); xhci_halt(xhci); xhci_zero_64b_regs(xhci); - retval = xhci_reset(xhci); + retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); spin_unlock_irq(&xhci->lock); if (retval) return retval; @@ -5316,7 +5314,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) xhci_dbg(xhci, "Resetting HCD\n"); /* Reset the internal HC memory state and registers. */ - retval = xhci_reset(xhci); + retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); if (retval) return retval; xhci_dbg(xhci, "Reset complete\n"); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 5a75fe5631238..bc0789229527f 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -229,6 +229,9 @@ struct xhci_op_regs { #define CMD_ETE (1 << 14) /* bits 15:31 are reserved (and should be preserved on writes). */ +#define XHCI_RESET_LONG_USEC (10 * 1000 * 1000) +#define XHCI_RESET_SHORT_USEC (250 * 1000) + /* IMAN - Interrupt Management Register */ #define IMAN_IE (1 << 1) #define IMAN_IP (1 << 0) @@ -2083,11 +2086,11 @@ void xhci_free_container_ctx(struct xhci_hcd *xhci, /* xHCI host controller glue */ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *); -int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec); +int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us); void xhci_quiesce(struct xhci_hcd *xhci); int xhci_halt(struct xhci_hcd *xhci); int xhci_start(struct xhci_hcd *xhci); -int xhci_reset(struct xhci_hcd *xhci); +int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us); int xhci_run(struct usb_hcd *hcd); int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); void xhci_shutdown(struct usb_hcd *hcd); @@ -2467,6 +2470,8 @@ static inline const char *xhci_decode_ctrl_ctx(char *str, unsigned int bit; int ret = 0; + str[0] = '\0'; + if (drop) { ret = sprintf(str, "Drop:"); for_each_set_bit(bit, &drop, 32) @@ -2624,8 +2629,11 @@ static inline const char *xhci_decode_usbsts(char *str, u32 usbsts) { int ret = 0; + ret = sprintf(str, " 0x%08x", usbsts); + if (usbsts == ~(u32)0) - return " 0xffffffff"; + return str; + if (usbsts & STS_HALT) ret += sprintf(str + ret, " HCHalted"); if (usbsts & STS_FATAL) diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index de5c012570603..ef8d1c73c7545 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -66,6 +66,7 @@ config USB_SERIAL_SIMPLE - Libtransistor USB console - a number of Motorola phones - Motorola Tetra devices + - Nokia mobile phones - Novatel Wireless GPS receivers - Siemens USB/MPI adapter. - ViVOtech ViVOpay USB device. diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index a70fd86f735ca..88b284d61681a 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -116,6 +116,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530GC_PRODUCT_ID) }, { USB_DEVICE(SMART_VENDOR_ID, SMART_PRODUCT_ID) }, { USB_DEVICE(AT_VENDOR_ID, AT_VTKIT3_PRODUCT_ID) }, + { USB_DEVICE(IBM_VENDOR_ID, IBM_PRODUCT_ID) }, { } /* Terminating entry */ }; @@ -435,6 +436,7 @@ static int pl2303_detect_type(struct usb_serial *serial) case 0x105: case 0x305: case 0x405: + case 0x605: /* * Assume it's an HXN-type if the device doesn't * support the old read request value. diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 6097ee8fccb25..c5406452b774e 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -35,6 +35,9 @@ #define ATEN_PRODUCT_UC232B 0x2022 #define ATEN_PRODUCT_ID2 0x2118 +#define IBM_VENDOR_ID 0x04b3 +#define IBM_PRODUCT_ID 0x4016 + #define IODATA_VENDOR_ID 0x04bb #define IODATA_PRODUCT_ID 0x0a03 #define IODATA_PRODUCT_ID_RSAQ5 0x0a0e diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index bd23a7cb1be2b..4c6747889a194 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -91,6 +91,11 @@ DEVICE(moto_modem, MOTO_IDS); { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); +/* Nokia mobile phone driver */ +#define NOKIA_IDS() \ + { USB_DEVICE(0x0421, 0x069a) } /* Nokia 130 (RM-1035) */ +DEVICE(nokia, NOKIA_IDS); + /* Novatel Wireless GPS driver */ #define NOVATEL_IDS() \ { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ @@ -123,6 +128,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &vivopay_device, &moto_modem_device, &motorola_tetra_device, + &nokia_device, &novatel_gps_device, &hp4x_device, &suunto_device, @@ -140,6 +146,7 @@ static const struct usb_device_id id_table[] = { VIVOPAY_IDS(), MOTO_IDS(), MOTOROLA_TETRA_IDS(), + NOKIA_IDS(), NOVATEL_IDS(), HP4X_IDS(), SUUNTO_IDS(), diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c index 5f7d678502be4..6012603f3630e 100644 --- a/drivers/usb/storage/ene_ub6250.c +++ b/drivers/usb/storage/ene_ub6250.c @@ -237,36 +237,33 @@ static struct us_unusual_dev ene_ub6250_unusual_dev_list[] = { #define memstick_logaddr(logadr1, logadr0) ((((u16)(logadr1)) << 8) | (logadr0)) -struct SD_STATUS { - u8 Insert:1; - u8 Ready:1; - u8 MediaChange:1; - u8 IsMMC:1; - u8 HiCapacity:1; - u8 HiSpeed:1; - u8 WtP:1; - u8 Reserved:1; -}; - -struct MS_STATUS { - u8 Insert:1; - u8 Ready:1; - u8 MediaChange:1; - u8 IsMSPro:1; - u8 IsMSPHG:1; - u8 Reserved1:1; - u8 WtP:1; - u8 Reserved2:1; -}; - -struct SM_STATUS { - u8 Insert:1; - u8 Ready:1; - u8 MediaChange:1; - u8 Reserved:3; - u8 WtP:1; - u8 IsMS:1; -}; +/* SD_STATUS bits */ +#define SD_Insert BIT(0) +#define SD_Ready BIT(1) +#define SD_MediaChange BIT(2) +#define SD_IsMMC BIT(3) +#define SD_HiCapacity BIT(4) +#define SD_HiSpeed BIT(5) +#define SD_WtP BIT(6) + /* Bit 7 reserved */ + +/* MS_STATUS bits */ +#define MS_Insert BIT(0) +#define MS_Ready BIT(1) +#define MS_MediaChange BIT(2) +#define MS_IsMSPro BIT(3) +#define MS_IsMSPHG BIT(4) + /* Bit 5 reserved */ +#define MS_WtP BIT(6) + /* Bit 7 reserved */ + +/* SM_STATUS bits */ +#define SM_Insert BIT(0) +#define SM_Ready BIT(1) +#define SM_MediaChange BIT(2) + /* Bits 3-5 reserved */ +#define SM_WtP BIT(6) +#define SM_IsMS BIT(7) struct ms_bootblock_cis { u8 bCistplDEVICE[6]; /* 0 */ @@ -437,9 +434,9 @@ struct ene_ub6250_info { u8 *bbuf; /* for 6250 code */ - struct SD_STATUS SD_Status; - struct MS_STATUS MS_Status; - struct SM_STATUS SM_Status; + u8 SD_Status; + u8 MS_Status; + u8 SM_Status; /* ----- SD Control Data ---------------- */ /*SD_REGISTER SD_Regs; */ @@ -602,7 +599,7 @@ static int sd_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb) { struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; - if (info->SD_Status.Insert && info->SD_Status.Ready) + if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready)) return USB_STOR_TRANSPORT_GOOD; else { ene_sd_init(us); @@ -622,7 +619,7 @@ static int sd_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb) 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; - if (info->SD_Status.WtP) + if (info->SD_Status & SD_WtP) usb_stor_set_xfer_buf(mediaWP, 12, srb); else usb_stor_set_xfer_buf(mediaNoWP, 12, srb); @@ -641,9 +638,9 @@ static int sd_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb) struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; usb_stor_dbg(us, "sd_scsi_read_capacity\n"); - if (info->SD_Status.HiCapacity) { + if (info->SD_Status & SD_HiCapacity) { bl_len = 0x200; - if (info->SD_Status.IsMMC) + if (info->SD_Status & SD_IsMMC) bl_num = info->HC_C_SIZE-1; else bl_num = (info->HC_C_SIZE + 1) * 1024 - 1; @@ -693,7 +690,7 @@ static int sd_scsi_read(struct us_data *us, struct scsi_cmnd *srb) return USB_STOR_TRANSPORT_ERROR; } - if (info->SD_Status.HiCapacity) + if (info->SD_Status & SD_HiCapacity) bnByte = bn; /* set up the command wrapper */ @@ -733,7 +730,7 @@ static int sd_scsi_write(struct us_data *us, struct scsi_cmnd *srb) return USB_STOR_TRANSPORT_ERROR; } - if (info->SD_Status.HiCapacity) + if (info->SD_Status & SD_HiCapacity) bnByte = bn; /* set up the command wrapper */ @@ -1456,7 +1453,7 @@ static int ms_scsi_test_unit_ready(struct us_data *us, struct scsi_cmnd *srb) struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); /* pr_info("MS_SCSI_Test_Unit_Ready\n"); */ - if (info->MS_Status.Insert && info->MS_Status.Ready) { + if ((info->MS_Status & MS_Insert) && (info->MS_Status & MS_Ready)) { return USB_STOR_TRANSPORT_GOOD; } else { ene_ms_init(us); @@ -1476,7 +1473,7 @@ static int ms_scsi_mode_sense(struct us_data *us, struct scsi_cmnd *srb) 0x0b, 0x00, 0x80, 0x08, 0x00, 0x00, 0x71, 0xc0, 0x00, 0x00, 0x02, 0x00 }; - if (info->MS_Status.WtP) + if (info->MS_Status & MS_WtP) usb_stor_set_xfer_buf(mediaWP, 12, srb); else usb_stor_set_xfer_buf(mediaNoWP, 12, srb); @@ -1495,7 +1492,7 @@ static int ms_scsi_read_capacity(struct us_data *us, struct scsi_cmnd *srb) usb_stor_dbg(us, "ms_scsi_read_capacity\n"); bl_len = 0x200; - if (info->MS_Status.IsMSPro) + if (info->MS_Status & MS_IsMSPro) bl_num = info->MSP_TotalBlock - 1; else bl_num = info->MS_Lib.NumberOfLogBlock * info->MS_Lib.blockSize * 2 - 1; @@ -1650,7 +1647,7 @@ static int ms_scsi_read(struct us_data *us, struct scsi_cmnd *srb) if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; - if (info->MS_Status.IsMSPro) { + if (info->MS_Status & MS_IsMSPro) { result = ene_load_bincode(us, MSP_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Load MPS RW pattern Fail !!\n"); @@ -1751,7 +1748,7 @@ static int ms_scsi_write(struct us_data *us, struct scsi_cmnd *srb) if (bn > info->bl_num) return USB_STOR_TRANSPORT_ERROR; - if (info->MS_Status.IsMSPro) { + if (info->MS_Status & MS_IsMSPro) { result = ene_load_bincode(us, MSP_RW_PATTERN); if (result != USB_STOR_XFER_GOOD) { pr_info("Load MSP RW pattern Fail !!\n"); @@ -1859,12 +1856,12 @@ static int ene_get_card_status(struct us_data *us, u8 *buf) tmpreg = (u16) reg4b; reg4b = *(u32 *)(&buf[0x14]); - if (info->SD_Status.HiCapacity && !info->SD_Status.IsMMC) + if ((info->SD_Status & SD_HiCapacity) && !(info->SD_Status & SD_IsMMC)) info->HC_C_SIZE = (reg4b >> 8) & 0x3fffff; info->SD_C_SIZE = ((tmpreg & 0x03) << 10) | (u16)(reg4b >> 22); info->SD_C_SIZE_MULT = (u8)(reg4b >> 7) & 0x07; - if (info->SD_Status.HiCapacity && info->SD_Status.IsMMC) + if ((info->SD_Status & SD_HiCapacity) && (info->SD_Status & SD_IsMMC)) info->HC_C_SIZE = *(u32 *)(&buf[0x100]); if (info->SD_READ_BL_LEN > SD_BLOCK_LEN) { @@ -2076,6 +2073,7 @@ static int ene_ms_init(struct us_data *us) u16 MSP_BlockSize, MSP_UserAreaBlocks; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; u8 *bbuf = info->bbuf; + unsigned int s; printk(KERN_INFO "transport --- ENE_MSInit\n"); @@ -2100,15 +2098,16 @@ static int ene_ms_init(struct us_data *us) return USB_STOR_TRANSPORT_ERROR; } /* the same part to test ENE */ - info->MS_Status = *(struct MS_STATUS *) bbuf; - - if (info->MS_Status.Insert && info->MS_Status.Ready) { - printk(KERN_INFO "Insert = %x\n", info->MS_Status.Insert); - printk(KERN_INFO "Ready = %x\n", info->MS_Status.Ready); - printk(KERN_INFO "IsMSPro = %x\n", info->MS_Status.IsMSPro); - printk(KERN_INFO "IsMSPHG = %x\n", info->MS_Status.IsMSPHG); - printk(KERN_INFO "WtP= %x\n", info->MS_Status.WtP); - if (info->MS_Status.IsMSPro) { + info->MS_Status = bbuf[0]; + + s = info->MS_Status; + if ((s & MS_Insert) && (s & MS_Ready)) { + printk(KERN_INFO "Insert = %x\n", !!(s & MS_Insert)); + printk(KERN_INFO "Ready = %x\n", !!(s & MS_Ready)); + printk(KERN_INFO "IsMSPro = %x\n", !!(s & MS_IsMSPro)); + printk(KERN_INFO "IsMSPHG = %x\n", !!(s & MS_IsMSPHG)); + printk(KERN_INFO "WtP= %x\n", !!(s & MS_WtP)); + if (s & MS_IsMSPro) { MSP_BlockSize = (bbuf[6] << 8) | bbuf[7]; MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11]; info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks; @@ -2169,17 +2168,17 @@ static int ene_sd_init(struct us_data *us) return USB_STOR_TRANSPORT_ERROR; } - info->SD_Status = *(struct SD_STATUS *) bbuf; - if (info->SD_Status.Insert && info->SD_Status.Ready) { - struct SD_STATUS *s = &info->SD_Status; + info->SD_Status = bbuf[0]; + if ((info->SD_Status & SD_Insert) && (info->SD_Status & SD_Ready)) { + unsigned int s = info->SD_Status; ene_get_card_status(us, bbuf); - usb_stor_dbg(us, "Insert = %x\n", s->Insert); - usb_stor_dbg(us, "Ready = %x\n", s->Ready); - usb_stor_dbg(us, "IsMMC = %x\n", s->IsMMC); - usb_stor_dbg(us, "HiCapacity = %x\n", s->HiCapacity); - usb_stor_dbg(us, "HiSpeed = %x\n", s->HiSpeed); - usb_stor_dbg(us, "WtP = %x\n", s->WtP); + usb_stor_dbg(us, "Insert = %x\n", !!(s & SD_Insert)); + usb_stor_dbg(us, "Ready = %x\n", !!(s & SD_Ready)); + usb_stor_dbg(us, "IsMMC = %x\n", !!(s & SD_IsMMC)); + usb_stor_dbg(us, "HiCapacity = %x\n", !!(s & SD_HiCapacity)); + usb_stor_dbg(us, "HiSpeed = %x\n", !!(s & SD_HiSpeed)); + usb_stor_dbg(us, "WtP = %x\n", !!(s & SD_WtP)); } else { usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; @@ -2201,14 +2200,14 @@ static int ene_init(struct us_data *us) misc_reg03 = bbuf[0]; if (misc_reg03 & 0x01) { - if (!info->SD_Status.Ready) { + if (!(info->SD_Status & SD_Ready)) { result = ene_sd_init(us); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; } } if (misc_reg03 & 0x02) { - if (!info->MS_Status.Ready) { + if (!(info->MS_Status & MS_Ready)) { result = ene_ms_init(us); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; @@ -2307,14 +2306,14 @@ static int ene_transport(struct scsi_cmnd *srb, struct us_data *us) /*US_DEBUG(usb_stor_show_command(us, srb)); */ scsi_set_resid(srb, 0); - if (unlikely(!(info->SD_Status.Ready || info->MS_Status.Ready))) + if (unlikely(!(info->SD_Status & SD_Ready) || (info->MS_Status & MS_Ready))) result = ene_init(us); if (result == USB_STOR_XFER_GOOD) { result = USB_STOR_TRANSPORT_ERROR; - if (info->SD_Status.Ready) + if (info->SD_Status & SD_Ready) result = sd_scsi_irp(us, srb); - if (info->MS_Status.Ready) + if (info->MS_Status & MS_Ready) result = ms_scsi_irp(us, srb); } return result; @@ -2378,7 +2377,6 @@ static int ene_ub6250_probe(struct usb_interface *intf, static int ene_ub6250_resume(struct usb_interface *iface) { - u8 tmp = 0; struct us_data *us = usb_get_intfdata(iface); struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); @@ -2390,17 +2388,16 @@ static int ene_ub6250_resume(struct usb_interface *iface) mutex_unlock(&us->dev_mutex); info->Power_IsResum = true; - /*info->SD_Status.Ready = 0; */ - info->SD_Status = *(struct SD_STATUS *)&tmp; - info->MS_Status = *(struct MS_STATUS *)&tmp; - info->SM_Status = *(struct SM_STATUS *)&tmp; + /* info->SD_Status &= ~SD_Ready; */ + info->SD_Status = 0; + info->MS_Status = 0; + info->SM_Status = 0; return 0; } static int ene_ub6250_reset_resume(struct usb_interface *iface) { - u8 tmp = 0; struct us_data *us = usb_get_intfdata(iface); struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); @@ -2412,10 +2409,10 @@ static int ene_ub6250_reset_resume(struct usb_interface *iface) * the device */ info->Power_IsResum = true; - /*info->SD_Status.Ready = 0; */ - info->SD_Status = *(struct SD_STATUS *)&tmp; - info->MS_Status = *(struct MS_STATUS *)&tmp; - info->SM_Status = *(struct SM_STATUS *)&tmp; + /* info->SD_Status &= ~SD_Ready; */ + info->SD_Status = 0; + info->MS_Status = 0; + info->SM_Status = 0; return 0; } diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 3789698d9d3c6..0c423916d7bfa 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -365,7 +365,7 @@ static int rts51x_read_mem(struct us_data *us, u16 addr, u8 *data, u16 len) buf = kmalloc(len, GFP_NOIO); if (buf == NULL) - return USB_STOR_TRANSPORT_ERROR; + return -ENOMEM; usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 7ffcda94d323a..16b4560216ba6 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -256,6 +256,10 @@ static int tps6598x_connect(struct tps6598x *tps, u32 status) typec_set_pwr_opmode(tps->port, mode); typec_set_pwr_role(tps->port, TPS_STATUS_TO_TYPEC_PORTROLE(status)); typec_set_vconn_role(tps->port, TPS_STATUS_TO_TYPEC_VCONN(status)); + if (TPS_STATUS_TO_UPSIDE_DOWN(status)) + typec_set_orientation(tps->port, TYPEC_ORIENTATION_REVERSE); + else + typec_set_orientation(tps->port, TYPEC_ORIENTATION_NORMAL); tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), true); tps->partner = typec_register_partner(tps->port, &desc); @@ -278,6 +282,7 @@ static void tps6598x_disconnect(struct tps6598x *tps, u32 status) typec_set_pwr_opmode(tps->port, TYPEC_PWR_MODE_USB); typec_set_pwr_role(tps->port, TPS_STATUS_TO_TYPEC_PORTROLE(status)); typec_set_vconn_role(tps->port, TPS_STATUS_TO_TYPEC_VCONN(status)); + typec_set_orientation(tps->port, TYPEC_ORIENTATION_NONE); tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), false); power_supply_changed(tps->psy); diff --git a/drivers/usb/typec/tipd/tps6598x.h b/drivers/usb/typec/tipd/tps6598x.h index 3dae84c524fb5..527857549d699 100644 --- a/drivers/usb/typec/tipd/tps6598x.h +++ b/drivers/usb/typec/tipd/tps6598x.h @@ -17,6 +17,7 @@ /* TPS_REG_STATUS bits */ #define TPS_STATUS_PLUG_PRESENT BIT(0) #define TPS_STATUS_PLUG_UPSIDE_DOWN BIT(4) +#define TPS_STATUS_TO_UPSIDE_DOWN(s) (!!((s) & TPS_STATUS_PLUG_UPSIDE_DOWN)) #define TPS_STATUS_PORTROLE BIT(5) #define TPS_STATUS_TO_TYPEC_PORTROLE(s) (!!((s) & TPS_STATUS_PORTROLE)) #define TPS_STATUS_DATAROLE BIT(6) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index d0f91078600e9..9fe1071a96444 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1669,7 +1669,7 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) return; if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { - if (!mvdev->cvq.ready) + if (!mvdev->wq || !mvdev->cvq.ready) return; wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); @@ -2707,9 +2707,12 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct workqueue_struct *wq; mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); - destroy_workqueue(mvdev->wq); + wq = mvdev->wq; + mvdev->wq = NULL; + destroy_workqueue(wq); _vdpa_unregister_device(dev); mgtdev->ndev = NULL; } diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index f948e6cd29939..2e6409cc11ada 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -228,6 +228,19 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat if (!ret) { /* D3 might be unsupported via quirk, skip unless in D3 */ if (needs_save && pdev->current_state >= PCI_D3hot) { + /* + * The current PCI state will be saved locally in + * 'pm_save' during the D3hot transition. When the + * device state is changed to D0 again with the current + * function, then pci_store_saved_state() will restore + * the state and will free the memory pointed by + * 'pm_save'. There are few cases where the PCI power + * state can be changed to D0 without the involvement + * of the driver. For these cases, free the earlier + * allocated memory first before overwriting 'pm_save' + * to prevent the memory leak. + */ + kfree(vdev->pm_save); vdev->pm_save = pci_store_saved_state(pdev); } else if (needs_restore) { pci_load_and_free_saved_state(pdev, &vdev->pm_save); @@ -322,6 +335,17 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) /* For needs_reset */ lockdep_assert_held(&vdev->vdev.dev_set->lock); + /* + * This function can be invoked while the power state is non-D0. + * This function calls __pci_reset_function_locked() which internally + * can use pci_pm_reset() for the function reset. pci_pm_reset() will + * fail if the power state is non-D0. Also, for the devices which + * have NoSoftRst-, the reset function can cause the PCI config space + * reset without restoring the original state (saved locally in + * 'vdev->pm_save'). + */ + vfio_pci_set_power_state(vdev, PCI_D0); + /* Stop the device from further DMA */ pci_clear_master(pdev); @@ -921,6 +945,19 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, return -EINVAL; vfio_pci_zap_and_down_write_memory_lock(vdev); + + /* + * This function can be invoked while the power state is non-D0. + * If pci_try_reset_function() has been called while the power + * state is non-D0, then pci_try_reset_function() will + * internally set the power state to D0 without vfio driver + * involvement. For the devices which have NoSoftRst-, the + * reset function can cause the PCI config space reset without + * restoring the original state (saved locally in + * 'vdev->pm_save'). + */ + vfio_pci_set_power_state(vdev, PCI_D0); + ret = pci_try_reset_function(vdev->pdev); up_write(&vdev->memory_lock); @@ -2055,6 +2092,18 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, } cur_mem = NULL; + /* + * The pci_reset_bus() will reset all the devices in the bus. + * The power state can be non-D0 for some of the devices in the bus. + * For these devices, the pci_reset_bus() will internally set + * the power state to D0 without vfio driver involvement. + * For the devices which have NoSoftRst-, the reset function can + * cause the PCI config space reset without restoring the original + * state (saved locally in 'vdev->pm_save'). + */ + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) + vfio_pci_set_power_state(cur, PCI_D0); + ret = pci_reset_bus(pdev); err_undo: @@ -2108,6 +2157,18 @@ static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set) if (!pdev) return false; + /* + * The pci_reset_bus() will reset all the devices in the bus. + * The power state can be non-D0 for some of the devices in the bus. + * For these devices, the pci_reset_bus() will internally set + * the power state to D0 without vfio driver involvement. + * For the devices which have NoSoftRst-, the reset function can + * cause the PCI config space reset without restoring the original + * state (saved locally in 'vdev->pm_save'). + */ + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) + vfio_pci_set_power_state(cur, PCI_D0); + ret = pci_reset_bus(pdev); if (ret) return false; diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c index 40b098320b2a7..5829cf2d0552d 100644 --- a/drivers/vhost/iotlb.c +++ b/drivers/vhost/iotlb.c @@ -62,8 +62,12 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, */ if (start == 0 && last == ULONG_MAX) { u64 mid = last / 2; + int err = vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, + perm, opaque); + + if (err) + return err; - vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, perm, opaque); addr += mid + 1; start = mid + 1; } diff --git a/drivers/video/fbdev/atafb.c b/drivers/video/fbdev/atafb.c index e3812a8ff55a4..29e650ecfceb1 100644 --- a/drivers/video/fbdev/atafb.c +++ b/drivers/video/fbdev/atafb.c @@ -1683,9 +1683,9 @@ static int falcon_setcolreg(unsigned int regno, unsigned int red, ((blue & 0xfc00) >> 8)); if (regno < 16) { shifter_tt.color_reg[regno] = - (((red & 0xe000) >> 13) | ((red & 0x1000) >> 12) << 8) | - (((green & 0xe000) >> 13) | ((green & 0x1000) >> 12) << 4) | - ((blue & 0xe000) >> 13) | ((blue & 0x1000) >> 12); + ((((red & 0xe000) >> 13) | ((red & 0x1000) >> 12)) << 8) | + ((((green & 0xe000) >> 13) | ((green & 0x1000) >> 12)) << 4) | + ((blue & 0xe000) >> 13) | ((blue & 0x1000) >> 12); ((u32 *)info->pseudo_palette)[regno] = ((red & 0xf800) | ((green & 0xfc00) >> 5) | ((blue & 0xf800) >> 11)); @@ -1971,9 +1971,9 @@ static int stste_setcolreg(unsigned int regno, unsigned int red, green >>= 12; if (ATARIHW_PRESENT(EXTD_SHIFTER)) shifter_tt.color_reg[regno] = - (((red & 0xe) >> 1) | ((red & 1) << 3) << 8) | - (((green & 0xe) >> 1) | ((green & 1) << 3) << 4) | - ((blue & 0xe) >> 1) | ((blue & 1) << 3); + ((((red & 0xe) >> 1) | ((red & 1) << 3)) << 8) | + ((((green & 0xe) >> 1) | ((green & 1) << 3)) << 4) | + ((blue & 0xe) >> 1) | ((blue & 1) << 3); else shifter_tt.color_reg[regno] = ((red & 0xe) << 7) | diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index 355b6120dc4f0..1fc8de4ecbebf 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -1062,15 +1062,16 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev) INIT_LIST_HEAD(&info->modelist); - if (pdev->dev.of_node) { - ret = atmel_lcdfb_of_init(sinfo); - if (ret) - goto free_info; - } else { + if (!pdev->dev.of_node) { dev_err(dev, "cannot get default configuration\n"); goto free_info; } + ret = atmel_lcdfb_of_init(sinfo); + if (ret) + goto free_info; + + ret = -ENODEV; if (!sinfo->config) goto free_info; diff --git a/drivers/video/fbdev/cirrusfb.c b/drivers/video/fbdev/cirrusfb.c index 93802abbbc72a..3d47c347b8970 100644 --- a/drivers/video/fbdev/cirrusfb.c +++ b/drivers/video/fbdev/cirrusfb.c @@ -469,7 +469,7 @@ static int cirrusfb_check_mclk(struct fb_info *info, long freq) return 0; } -static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, +static int cirrusfb_check_pixclock(struct fb_var_screeninfo *var, struct fb_info *info) { long freq; @@ -478,9 +478,7 @@ static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, unsigned maxclockidx = var->bits_per_pixel >> 3; /* convert from ps to kHz */ - freq = PICOS2KHZ(var->pixclock); - - dev_dbg(info->device, "desired pixclock: %ld kHz\n", freq); + freq = PICOS2KHZ(var->pixclock ? : 1); maxclock = cirrusfb_board_info[cinfo->btype].maxclock[maxclockidx]; cinfo->multiplexing = 0; @@ -488,11 +486,13 @@ static int cirrusfb_check_pixclock(const struct fb_var_screeninfo *var, /* If the frequency is greater than we can support, we might be able * to use multiplexing for the video mode */ if (freq > maxclock) { - dev_err(info->device, - "Frequency greater than maxclock (%ld kHz)\n", - maxclock); - return -EINVAL; + var->pixclock = KHZ2PICOS(maxclock); + + while ((freq = PICOS2KHZ(var->pixclock)) > maxclock) + var->pixclock++; } + dev_dbg(info->device, "desired pixclock: %ld kHz\n", freq); + /* * Additional constraint: 8bpp uses DAC clock doubling to allow maximum * pixel clock diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c index 509311471d515..bd59e7b11ed53 100644 --- a/drivers/video/fbdev/controlfb.c +++ b/drivers/video/fbdev/controlfb.c @@ -67,7 +67,9 @@ #define out_8(addr, val) (void)(val) #define in_le32(addr) 0 #define out_le32(addr, val) (void)(val) +#ifndef pgprot_cached_wthru #define pgprot_cached_wthru(prot) (prot) +#endif #else static void invalid_vram_cache(void __force *addr) { diff --git a/drivers/video/fbdev/core/fbcvt.c b/drivers/video/fbdev/core/fbcvt.c index 55d2bd0ce5c02..64843464c6613 100644 --- a/drivers/video/fbdev/core/fbcvt.c +++ b/drivers/video/fbdev/core/fbcvt.c @@ -214,9 +214,11 @@ static u32 fb_cvt_aspect_ratio(struct fb_cvt_data *cvt) static void fb_cvt_print_name(struct fb_cvt_data *cvt) { u32 pixcount, pixcount_mod; - int cnt = 255, offset = 0, read = 0; - u8 *buf = kzalloc(256, GFP_KERNEL); + int size = 256; + int off = 0; + u8 *buf; + buf = kzalloc(size, GFP_KERNEL); if (!buf) return; @@ -224,43 +226,30 @@ static void fb_cvt_print_name(struct fb_cvt_data *cvt) pixcount_mod = (cvt->xres * (cvt->yres/cvt->interlace)) % 1000000; pixcount_mod /= 1000; - read = snprintf(buf+offset, cnt, "fbcvt: %dx%d@%d: CVT Name - ", - cvt->xres, cvt->yres, cvt->refresh); - offset += read; - cnt -= read; + off += scnprintf(buf + off, size - off, "fbcvt: %dx%d@%d: CVT Name - ", + cvt->xres, cvt->yres, cvt->refresh); - if (cvt->status) - snprintf(buf+offset, cnt, "Not a CVT standard - %d.%03d Mega " - "Pixel Image\n", pixcount, pixcount_mod); - else { - if (pixcount) { - read = snprintf(buf+offset, cnt, "%d", pixcount); - cnt -= read; - offset += read; - } + if (cvt->status) { + off += scnprintf(buf + off, size - off, + "Not a CVT standard - %d.%03d Mega Pixel Image\n", + pixcount, pixcount_mod); + } else { + if (pixcount) + off += scnprintf(buf + off, size - off, "%d", pixcount); - read = snprintf(buf+offset, cnt, ".%03dM", pixcount_mod); - cnt -= read; - offset += read; + off += scnprintf(buf + off, size - off, ".%03dM", pixcount_mod); if (cvt->aspect_ratio == 0) - read = snprintf(buf+offset, cnt, "3"); + off += scnprintf(buf + off, size - off, "3"); else if (cvt->aspect_ratio == 3) - read = snprintf(buf+offset, cnt, "4"); + off += scnprintf(buf + off, size - off, "4"); else if (cvt->aspect_ratio == 1 || cvt->aspect_ratio == 4) - read = snprintf(buf+offset, cnt, "9"); + off += scnprintf(buf + off, size - off, "9"); else if (cvt->aspect_ratio == 2) - read = snprintf(buf+offset, cnt, "A"); - else - read = 0; - cnt -= read; - offset += read; - - if (cvt->flags & FB_CVT_FLAG_REDUCED_BLANK) { - read = snprintf(buf+offset, cnt, "-R"); - cnt -= read; - offset += read; - } + off += scnprintf(buf + off, size - off, "A"); + + if (cvt->flags & FB_CVT_FLAG_REDUCED_BLANK) + off += scnprintf(buf + off, size - off, "-R"); } printk(KERN_INFO "%s\n", buf); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 13083ad8d7515..ad9aac06427a0 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1559,18 +1560,36 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a, /* check all firmware fbs and kick off if the base addr overlaps */ for_each_registered_fb(i) { struct apertures_struct *gen_aper; + struct device *device; if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE)) continue; gen_aper = registered_fb[i]->apertures; + device = registered_fb[i]->device; if (fb_do_apertures_overlap(gen_aper, a) || (primary && gen_aper && gen_aper->count && gen_aper->ranges[0].base == VGA_FB_PHYS)) { printk(KERN_INFO "fb%d: switching to %s from %s\n", i, name, registered_fb[i]->fix.id); - do_unregister_framebuffer(registered_fb[i]); + + /* + * If we kick-out a firmware driver, we also want to remove + * the underlying platform device, such as simple-framebuffer, + * VESA, EFI, etc. A native driver will then be able to + * allocate the memory range. + * + * If it's not a platform device, at least print a warning. A + * fix would add code to remove the device from the system. + */ + if (dev_is_platform(device)) { + registered_fb[i]->forced_out = true; + platform_device_unregister(to_platform_device(device)); + } else { + pr_warn("fb%d: cannot remove device\n", i); + do_unregister_framebuffer(registered_fb[i]); + } } } } @@ -1900,9 +1919,13 @@ EXPORT_SYMBOL(register_framebuffer); void unregister_framebuffer(struct fb_info *fb_info) { - mutex_lock(®istration_lock); + bool forced_out = fb_info->forced_out; + + if (!forced_out) + mutex_lock(®istration_lock); do_unregister_framebuffer(fb_info); - mutex_unlock(®istration_lock); + if (!forced_out) + mutex_unlock(®istration_lock); } EXPORT_SYMBOL(unregister_framebuffer); diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 5c82611e93d99..236521b19daf7 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -1377,7 +1377,7 @@ static struct video_board vbG200 = { .lowlevel = &matrox_G100 }; static struct video_board vbG200eW = { - .maxvram = 0x800000, + .maxvram = 0x100000, .maxdisplayable = 0x800000, .accelID = FB_ACCEL_MATROX_MGAG200, .lowlevel = &matrox_G100 diff --git a/drivers/video/fbdev/nvidia/nv_i2c.c b/drivers/video/fbdev/nvidia/nv_i2c.c index d7994a1732459..0b48965a6420c 100644 --- a/drivers/video/fbdev/nvidia/nv_i2c.c +++ b/drivers/video/fbdev/nvidia/nv_i2c.c @@ -86,7 +86,7 @@ static int nvidia_setup_i2c_bus(struct nvidia_i2c_chan *chan, const char *name, { int rc; - strcpy(chan->adapter.name, name); + strscpy(chan->adapter.name, name, sizeof(chan->adapter.name)); chan->adapter.owner = THIS_MODULE; chan->adapter.class = i2c_class; chan->adapter.algo_data = &chan->algo; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c index 2fa436475b406..c8ad3ef42bd31 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c @@ -246,6 +246,7 @@ static int dvic_probe_of(struct platform_device *pdev) adapter_node = of_parse_phandle(node, "ddc-i2c-bus", 0); if (adapter_node) { adapter = of_get_i2c_adapter_by_node(adapter_node); + of_node_put(adapter_node); if (adapter == NULL) { dev_err(&pdev->dev, "failed to parse ddc-i2c-bus\n"); omap_dss_put_device(ddata->in); diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c index 4b0793abdd84b..a2c7c5cb15234 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c @@ -409,7 +409,7 @@ static ssize_t dsicm_num_errors_show(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", errors); + return sysfs_emit(buf, "%d\n", errors); } static ssize_t dsicm_hw_revision_show(struct device *dev, @@ -439,7 +439,7 @@ static ssize_t dsicm_hw_revision_show(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%02x.%02x.%02x\n", id1, id2, id3); + return sysfs_emit(buf, "%02x.%02x.%02x\n", id1, id2, id3); } static ssize_t dsicm_store_ulps(struct device *dev, @@ -487,7 +487,7 @@ static ssize_t dsicm_show_ulps(struct device *dev, t = ddata->ulps_enabled; mutex_unlock(&ddata->lock); - return snprintf(buf, PAGE_SIZE, "%u\n", t); + return sysfs_emit(buf, "%u\n", t); } static ssize_t dsicm_store_ulps_timeout(struct device *dev, @@ -532,7 +532,7 @@ static ssize_t dsicm_show_ulps_timeout(struct device *dev, t = ddata->ulps_timeout; mutex_unlock(&ddata->lock); - return snprintf(buf, PAGE_SIZE, "%u\n", t); + return sysfs_emit(buf, "%u\n", t); } static DEVICE_ATTR(num_dsi_errors, S_IRUGO, dsicm_num_errors_show, NULL); diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c index 8d8b5ff7d43c8..3696eb09b69b4 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c @@ -476,7 +476,7 @@ static ssize_t show_cabc_available_modes(struct device *dev, int i; if (!ddata->has_cabc) - return snprintf(buf, PAGE_SIZE, "%s\n", cabc_modes[0]); + return sysfs_emit(buf, "%s\n", cabc_modes[0]); for (i = 0, len = 0; len < PAGE_SIZE && i < ARRAY_SIZE(cabc_modes); i++) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c index afac1d9445aa2..57b7d1f490962 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c @@ -169,7 +169,7 @@ static ssize_t tpo_td043_vmirror_show(struct device *dev, { struct panel_drv_data *ddata = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", ddata->vmirror); + return sysfs_emit(buf, "%d\n", ddata->vmirror); } static ssize_t tpo_td043_vmirror_store(struct device *dev, @@ -199,7 +199,7 @@ static ssize_t tpo_td043_mode_show(struct device *dev, { struct panel_drv_data *ddata = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", ddata->mode); + return sysfs_emit(buf, "%d\n", ddata->mode); } static ssize_t tpo_td043_mode_store(struct device *dev, diff --git a/drivers/video/fbdev/sm712fb.c b/drivers/video/fbdev/sm712fb.c index 0dbc6bf8268ac..092a1caa1208e 100644 --- a/drivers/video/fbdev/sm712fb.c +++ b/drivers/video/fbdev/sm712fb.c @@ -1047,7 +1047,7 @@ static ssize_t smtcfb_read(struct fb_info *info, char __user *buf, if (count + p > total_size) count = total_size - p; - buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count, GFP_KERNEL); + buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -1059,25 +1059,14 @@ static ssize_t smtcfb_read(struct fb_info *info, char __user *buf, while (count) { c = (count > PAGE_SIZE) ? PAGE_SIZE : count; dst = buffer; - for (i = c >> 2; i--;) { - *dst = fb_readl(src++); - *dst = big_swap(*dst); + for (i = (c + 3) >> 2; i--;) { + u32 val; + + val = fb_readl(src); + *dst = big_swap(val); + src++; dst++; } - if (c & 3) { - u8 *dst8 = (u8 *)dst; - u8 __iomem *src8 = (u8 __iomem *)src; - - for (i = c & 3; i--;) { - if (i & 1) { - *dst8++ = fb_readb(++src8); - } else { - *dst8++ = fb_readb(--src8); - src8 += 2; - } - } - src = (u32 __iomem *)src8; - } if (copy_to_user(buf, buffer, c)) { err = -EFAULT; @@ -1130,7 +1119,7 @@ static ssize_t smtcfb_write(struct fb_info *info, const char __user *buf, count = total_size - p; } - buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count, GFP_KERNEL); + buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -1148,24 +1137,11 @@ static ssize_t smtcfb_write(struct fb_info *info, const char __user *buf, break; } - for (i = c >> 2; i--;) { - fb_writel(big_swap(*src), dst++); + for (i = (c + 3) >> 2; i--;) { + fb_writel(big_swap(*src), dst); + dst++; src++; } - if (c & 3) { - u8 *src8 = (u8 *)src; - u8 __iomem *dst8 = (u8 __iomem *)dst; - - for (i = c & 3; i--;) { - if (i & 1) { - fb_writeb(*src8++, ++dst8); - } else { - fb_writeb(*src8++, --dst8); - dst8 += 2; - } - } - dst = (u32 __iomem *)dst8; - } *ppos += c; buf += c; diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c index bfac3ee4a6422..28768c272b73d 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c @@ -1656,6 +1656,7 @@ static int ufx_usb_probe(struct usb_interface *interface, info->par = dev; info->pseudo_palette = dev->pseudo_palette; info->fbops = &ufx_ops; + INIT_LIST_HEAD(&info->modelist); retval = fb_alloc_cmap(&info->cmap, 256, 0); if (retval < 0) { @@ -1666,8 +1667,6 @@ static int ufx_usb_probe(struct usb_interface *interface, INIT_DELAYED_WORK(&dev->free_framebuffer_work, ufx_free_framebuffer_work); - INIT_LIST_HEAD(&info->modelist); - retval = ufx_reg_read(dev, 0x3000, &id_rev); check_warn_goto_error(retval, "error %d reading 0x3000 register from device", retval); dev_dbg(dev->gdev, "ID_REV register value 0x%08x", id_rev); diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index b9cdd02c10009..90f48b71fd8f7 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -1426,7 +1426,7 @@ static ssize_t metrics_bytes_rendered_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->bytes_rendered)); } @@ -1434,7 +1434,7 @@ static ssize_t metrics_bytes_identical_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->bytes_identical)); } @@ -1442,7 +1442,7 @@ static ssize_t metrics_bytes_sent_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->bytes_sent)); } @@ -1450,7 +1450,7 @@ static ssize_t metrics_cpu_kcycles_used_show(struct device *fbdev, struct device_attribute *a, char *buf) { struct fb_info *fb_info = dev_get_drvdata(fbdev); struct dlfb_data *dlfb = fb_info->par; - return snprintf(buf, PAGE_SIZE, "%u\n", + return sysfs_emit(buf, "%u\n", atomic_read(&dlfb->cpu_kcycles_used)); } diff --git a/drivers/video/fbdev/w100fb.c b/drivers/video/fbdev/w100fb.c index d96ab28f8ce4a..4e641a780726e 100644 --- a/drivers/video/fbdev/w100fb.c +++ b/drivers/video/fbdev/w100fb.c @@ -770,12 +770,18 @@ static int w100fb_probe(struct platform_device *pdev) fb_dealloc_cmap(&info->cmap); kfree(info->pseudo_palette); } - if (remapped_fbuf != NULL) + if (remapped_fbuf != NULL) { iounmap(remapped_fbuf); - if (remapped_regs != NULL) + remapped_fbuf = NULL; + } + if (remapped_regs != NULL) { iounmap(remapped_regs); - if (remapped_base != NULL) + remapped_regs = NULL; + } + if (remapped_base != NULL) { iounmap(remapped_base); + remapped_base = NULL; + } if (info) framebuffer_release(info); return err; @@ -795,8 +801,11 @@ static int w100fb_remove(struct platform_device *pdev) fb_dealloc_cmap(&info->cmap); iounmap(remapped_base); + remapped_base = NULL; iounmap(remapped_regs); + remapped_regs = NULL; iounmap(remapped_fbuf); + remapped_fbuf = NULL; framebuffer_release(info); diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c index 5419794fccf1e..423ea888d79af 100644 --- a/drivers/virt/acrn/hsm.c +++ b/drivers/virt/acrn/hsm.c @@ -136,8 +136,10 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd, if (IS_ERR(vm_param)) return PTR_ERR(vm_param); - if ((vm_param->reserved0 | vm_param->reserved1) != 0) + if ((vm_param->reserved0 | vm_param->reserved1) != 0) { + kfree(vm_param); return -EINVAL; + } vm = acrn_vm_create(vm, vm_param); if (!vm) { @@ -182,21 +184,29 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd, return PTR_ERR(cpu_regs); for (i = 0; i < ARRAY_SIZE(cpu_regs->reserved); i++) - if (cpu_regs->reserved[i]) + if (cpu_regs->reserved[i]) { + kfree(cpu_regs); return -EINVAL; + } for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_32); i++) - if (cpu_regs->vcpu_regs.reserved_32[i]) + if (cpu_regs->vcpu_regs.reserved_32[i]) { + kfree(cpu_regs); return -EINVAL; + } for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_64); i++) - if (cpu_regs->vcpu_regs.reserved_64[i]) + if (cpu_regs->vcpu_regs.reserved_64[i]) { + kfree(cpu_regs); return -EINVAL; + } for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.gdt.reserved); i++) if (cpu_regs->vcpu_regs.gdt.reserved[i] | - cpu_regs->vcpu_regs.idt.reserved[i]) + cpu_regs->vcpu_regs.idt.reserved[i]) { + kfree(cpu_regs); return -EINVAL; + } ret = hcall_set_vcpu_regs(vm->vmid, virt_to_phys(cpu_regs)); if (ret < 0) diff --git a/drivers/virt/acrn/mm.c b/drivers/virt/acrn/mm.c index c4f2e15c8a2ba..3b1b1e7a844b4 100644 --- a/drivers/virt/acrn/mm.c +++ b/drivers/virt/acrn/mm.c @@ -162,10 +162,34 @@ int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap) void *remap_vaddr; int ret, pinned; u64 user_vm_pa; + unsigned long pfn; + struct vm_area_struct *vma; if (!vm || !memmap) return -EINVAL; + mmap_read_lock(current->mm); + vma = vma_lookup(current->mm, memmap->vma_base); + if (vma && ((vma->vm_flags & VM_PFNMAP) != 0)) { + if ((memmap->vma_base + memmap->len) > vma->vm_end) { + mmap_read_unlock(current->mm); + return -EINVAL; + } + + ret = follow_pfn(vma, memmap->vma_base, &pfn); + mmap_read_unlock(current->mm); + if (ret < 0) { + dev_dbg(acrn_dev.this_device, + "Failed to lookup PFN at VMA:%pK.\n", (void *)memmap->vma_base); + return ret; + } + + return acrn_mm_region_add(vm, memmap->user_vm_pa, + PFN_PHYS(pfn), memmap->len, + ACRN_MEM_TYPE_WB, memmap->attr); + } + mmap_read_unlock(current->mm); + /* Get the page number of the map region */ nr_pages = memmap->len >> PAGE_SHIFT; pages = vzalloc(nr_pages * sizeof(struct page *)); diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 22f15f444f757..75c8d560bbd36 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -526,8 +526,9 @@ int virtio_device_restore(struct virtio_device *dev) goto err; } - /* Finally, tell the device we're all set */ - virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + /* If restore didn't do it, mark device DRIVER_OK ourselves. */ + if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) + virtio_device_ready(dev); virtio_config_enable(dev); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index fdbde1db5ec59..d724f676608ba 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -24,46 +24,17 @@ MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif -/* disable irq handlers */ -void vp_disable_cbs(struct virtio_device *vdev) +/* wait for pending irq handlers */ +void vp_synchronize_vectors(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) { - /* - * The below synchronize() guarantees that any - * interrupt for this line arriving after - * synchronize_irq() has completed is guaranteed to see - * intx_soft_enabled == false. - */ - WRITE_ONCE(vp_dev->intx_soft_enabled, false); + if (vp_dev->intx_enabled) synchronize_irq(vp_dev->pci_dev->irq); - } - - for (i = 0; i < vp_dev->msix_vectors; ++i) - disable_irq(pci_irq_vector(vp_dev->pci_dev, i)); -} - -/* enable irq handlers */ -void vp_enable_cbs(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - int i; - - if (vp_dev->intx_enabled) { - disable_irq(vp_dev->pci_dev->irq); - /* - * The above disable_irq() provides TSO ordering and - * as such promotes the below store to store-release. - */ - WRITE_ONCE(vp_dev->intx_soft_enabled, true); - enable_irq(vp_dev->pci_dev->irq); - return; - } for (i = 0; i < vp_dev->msix_vectors; ++i) - enable_irq(pci_irq_vector(vp_dev->pci_dev, i)); + synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ @@ -113,9 +84,6 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) struct virtio_pci_device *vp_dev = opaque; u8 isr; - if (!READ_ONCE(vp_dev->intx_soft_enabled)) - return IRQ_NONE; - /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); @@ -173,8 +141,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_config_changed, IRQF_NO_AUTOEN, - vp_dev->msix_names[v], + vp_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -193,8 +160,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_vring_interrupt, IRQF_NO_AUTOEN, - vp_dev->msix_names[v], + vp_vring_interrupt, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -371,7 +337,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, IRQF_NO_AUTOEN, + vring_interrupt, 0, vp_dev->msix_names[msix_vec], vqs[i]); if (err) diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 23f6c5c678d5e..eb17a29fc7ef1 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -63,7 +63,6 @@ struct virtio_pci_device { /* MSI-X support */ int msix_enabled; int intx_enabled; - bool intx_soft_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ @@ -102,10 +101,8 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) return container_of(vdev, struct virtio_pci_device, vdev); } -/* disable irq handlers */ -void vp_disable_cbs(struct virtio_device *vdev); -/* enable irq handlers */ -void vp_enable_cbs(struct virtio_device *vdev); +/* wait for pending irq handlers */ +void vp_synchronize_vectors(struct virtio_device *vdev); /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq); /* the config->del_vqs() implementation */ diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 34141b9abe278..6f4e34ce96b81 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -98,8 +98,8 @@ static void vp_reset(struct virtio_device *vdev) /* Flush out the status write, and flush in device writes, * including MSi-X interrupts, if any. */ vp_legacy_get_status(&vp_dev->ldev); - /* Disable VQ/configuration callbacks. */ - vp_disable_cbs(vdev); + /* Flush pending VQ/configuration callbacks. */ + vp_synchronize_vectors(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -185,7 +185,6 @@ static void del_vq(struct virtio_pci_vq_info *info) } static const struct virtio_config_ops virtio_pci_config_ops = { - .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .get_status = vp_get_status, diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 5455bc041fb69..30654d3a0b41e 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -172,8 +172,8 @@ static void vp_reset(struct virtio_device *vdev) */ while (vp_modern_get_status(mdev)) msleep(1); - /* Disable VQ/configuration callbacks. */ - vp_disable_cbs(vdev); + /* Flush pending VQ/configuration callbacks. */ + vp_synchronize_vectors(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -380,7 +380,6 @@ static bool vp_get_shm_region(struct virtio_device *vdev, } static const struct virtio_config_ops virtio_pci_config_nodev_ops = { - .enable_cbs = vp_enable_cbs, .get = NULL, .set = NULL, .generation = vp_generation, @@ -398,7 +397,6 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { }; static const struct virtio_config_ops virtio_pci_config_ops = { - .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .generation = vp_generation, diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index 117bc2a8eb0a4..db843f8258602 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -228,6 +228,7 @@ static int rti_wdt_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(dev); if (ret) { pm_runtime_put_noidle(dev); + pm_runtime_disable(&pdev->dev); return dev_err_probe(dev, ret, "runtime pm failed\n"); } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d61543fbd6528..11273b70271d1 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -170,8 +170,8 @@ static int padzero(unsigned long elf_bss) static int create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, - unsigned long load_addr, unsigned long interp_load_addr, - unsigned long e_entry) + unsigned long interp_load_addr, + unsigned long e_entry, unsigned long phdr_addr) { struct mm_struct *mm = current->mm; unsigned long p = bprm->p; @@ -257,7 +257,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); - NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); + NEW_AUX_ENT(AT_PHDR, phdr_addr); NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); NEW_AUX_ENT(AT_BASE, interp_load_addr); @@ -823,7 +823,7 @@ static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr, static int load_elf_binary(struct linux_binprm *bprm) { struct file *interpreter = NULL; /* to shut gcc up */ - unsigned long load_addr = 0, load_bias = 0; + unsigned long load_addr, load_bias = 0, phdr_addr = 0; int load_addr_set = 0; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; @@ -1180,6 +1180,17 @@ static int load_elf_binary(struct linux_binprm *bprm) reloc_func_desc = load_bias; } } + + /* + * Figure out which segment in the file contains the Program + * Header table, and map to the associated memory address. + */ + if (elf_ppnt->p_offset <= elf_ex->e_phoff && + elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) { + phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset + + elf_ppnt->p_vaddr; + } + k = elf_ppnt->p_vaddr; if ((elf_ppnt->p_flags & PF_X) && k < start_code) start_code = k; @@ -1215,6 +1226,7 @@ static int load_elf_binary(struct linux_binprm *bprm) } e_entry = elf_ex->e_entry + load_bias; + phdr_addr += load_bias; elf_bss += load_bias; elf_brk += load_bias; start_code += load_bias; @@ -1278,8 +1290,8 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ - retval = create_elf_tables(bprm, elf_ex, - load_addr, interp_load_addr, e_entry); + retval = create_elf_tables(bprm, elf_ex, interp_load_addr, + e_entry, phdr_addr); if (retval < 0) goto out; @@ -1630,17 +1642,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, * long file_ofs * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... */ -static int fill_files_note(struct memelfnote *note) +static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; unsigned count, size, names_ofs, remaining, n; user_long_t *data; user_long_t *start_end_ofs; char *name_base, *name_curpos; + int i; /* *Estimated* file count and total data size needed */ - count = mm->map_count; + count = cprm->vma_count; if (count > UINT_MAX / 64) return -EINVAL; size = count * 64; @@ -1662,11 +1673,12 @@ static int fill_files_note(struct memelfnote *note) name_base = name_curpos = ((char *)data) + names_ofs; remaining = size - names_ofs; count = 0; - for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *m = &cprm->vma_meta[i]; struct file *file; const char *filename; - file = vma->vm_file; + file = m->file; if (!file) continue; filename = file_path(file, name_curpos, remaining); @@ -1686,9 +1698,9 @@ static int fill_files_note(struct memelfnote *note) memmove(name_curpos, filename, n); name_curpos += n; - *start_end_ofs++ = vma->vm_start; - *start_end_ofs++ = vma->vm_end; - *start_end_ofs++ = vma->vm_pgoff; + *start_end_ofs++ = m->start; + *start_end_ofs++ = m->end; + *start_end_ofs++ = m->pgoff; count++; } @@ -1699,7 +1711,7 @@ static int fill_files_note(struct memelfnote *note) * Count usually is less than mm->map_count, * we need to move filenames down. */ - n = mm->map_count - count; + n = cprm->vma_count - count; if (n != 0) { unsigned shift_bytes = n * 3 * sizeof(data[0]); memmove(name_base - shift_bytes, name_base, @@ -1811,7 +1823,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - const kernel_siginfo_t *siginfo, struct pt_regs *regs) + struct coredump_params *cprm) { struct task_struct *dump_task = current; const struct user_regset_view *view = task_user_regset_view(dump_task); @@ -1883,7 +1895,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, * Now fill in each thread's information. */ for (t = info->thread; t != NULL; t = t->next) - if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size)) + if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, &info->size)) return 0; /* @@ -1892,13 +1904,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); info->size += notesize(&info->psinfo); - fill_siginfo_note(&info->signote, &info->csigdata, siginfo); + fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo); info->size += notesize(&info->signote); fill_auxv_note(&info->auxv, current->mm); info->size += notesize(&info->auxv); - if (fill_files_note(&info->files) == 0) + if (fill_files_note(&info->files, cprm) == 0) info->size += notesize(&info->files); return 1; @@ -2040,7 +2052,7 @@ static int elf_note_info_init(struct elf_note_info *info) static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - const kernel_siginfo_t *siginfo, struct pt_regs *regs) + struct coredump_params *cprm) { struct core_thread *ct; struct elf_thread_status *ets; @@ -2061,13 +2073,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, list_for_each_entry(ets, &info->thread_list, list) { int sz; - sz = elf_dump_thread_status(siginfo->si_signo, ets); + sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets); info->thread_status_size += sz; } /* now collect the dump for the current */ memset(info->prstatus, 0, sizeof(*info->prstatus)); - fill_prstatus(&info->prstatus->common, current, siginfo->si_signo); - elf_core_copy_regs(&info->prstatus->pr_reg, regs); + fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo); + elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs); /* Set up header */ fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); @@ -2083,18 +2095,18 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_note(info->notes + 1, "CORE", NT_PRPSINFO, sizeof(*info->psinfo), info->psinfo); - fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); + fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo); fill_auxv_note(info->notes + 3, current->mm); info->numnote = 4; - if (fill_files_note(info->notes + info->numnote) == 0) { + if (fill_files_note(info->notes + info->numnote, cprm) == 0) { info->notes_files = info->notes + info->numnote; info->numnote++; } /* Try to dump the FPU. */ - info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, - info->fpu); + info->prstatus->pr_fpvalid = + elf_core_copy_task_fpregs(current, cprm->regs, info->fpu); if (info->prstatus->pr_fpvalid) fill_note(info->notes + info->numnote++, "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); @@ -2180,8 +2192,7 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, static int elf_core_dump(struct coredump_params *cprm) { int has_dumped = 0; - int vma_count, segs, i; - size_t vma_data_size; + int segs, i; struct elfhdr elf; loff_t offset = 0, dataoff; struct elf_note_info info = { }; @@ -2189,16 +2200,12 @@ static int elf_core_dump(struct coredump_params *cprm) struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; elf_addr_t e_shoff; - struct core_vma_metadata *vma_meta; - - if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size)) - return 0; /* * The number of segs are recored into ELF header as 16bit value. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ - segs = vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(); /* for notes section */ segs++; @@ -2212,7 +2219,7 @@ static int elf_core_dump(struct coredump_params *cprm) * Collect all the non-memory information about the process for the * notes. This also sets up the file header. */ - if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs)) + if (!fill_note_info(&elf, e_phnum, &info, cprm)) goto end_coredump; has_dumped = 1; @@ -2237,7 +2244,7 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - offset += vma_data_size; + offset += cprm->vma_data_size; offset += elf_core_extra_data_size(); e_shoff = offset; @@ -2257,8 +2264,8 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Write program headers for segments dump */ - for (i = 0; i < vma_count; i++) { - struct core_vma_metadata *meta = vma_meta + i; + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *meta = cprm->vma_meta + i; struct elf_phdr phdr; phdr.p_type = PT_LOAD; @@ -2295,8 +2302,8 @@ static int elf_core_dump(struct coredump_params *cprm) /* Align to page */ dump_skip_to(cprm, dataoff); - for (i = 0; i < vma_count; i++) { - struct core_vma_metadata *meta = vma_meta + i; + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *meta = cprm->vma_meta + i; if (!dump_user_range(cprm, meta->start, meta->dump_size)) goto end_coredump; @@ -2313,7 +2320,6 @@ static int elf_core_dump(struct coredump_params *cprm) end_coredump: free_note_info(&info); kfree(shdr4extnum); - kvfree(vma_meta); kfree(phdr4note); return has_dumped; } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c6f588dc4a9db..1a25536b01201 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1465,7 +1465,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm, static int elf_fdpic_core_dump(struct coredump_params *cprm) { int has_dumped = 0; - int vma_count, segs; + int segs; int i; struct elfhdr *elf = NULL; loff_t offset = 0, dataoff; @@ -1480,8 +1480,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) elf_addr_t e_shoff; struct core_thread *ct; struct elf_thread_status *tmp; - struct core_vma_metadata *vma_meta = NULL; - size_t vma_data_size; /* alloc memory for large data structures: too large to be on stack */ elf = kmalloc(sizeof(*elf), GFP_KERNEL); @@ -1491,9 +1489,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) if (!psinfo) goto end_coredump; - if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size)) - goto end_coredump; - for (ct = current->signal->core_state->dumper.next; ct; ct = ct->next) { tmp = elf_dump_thread_status(cprm->siginfo->si_signo, @@ -1513,7 +1508,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) tmp->next = thread_list; thread_list = tmp; - segs = vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(); /* for notes section */ segs++; @@ -1558,7 +1553,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) /* Page-align dumped data */ dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - offset += vma_data_size; + offset += cprm->vma_data_size; offset += elf_core_extra_data_size(); e_shoff = offset; @@ -1578,8 +1573,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; /* write program headers for segments dump */ - for (i = 0; i < vma_count; i++) { - struct core_vma_metadata *meta = vma_meta + i; + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *meta = cprm->vma_meta + i; struct elf_phdr phdr; size_t sz; @@ -1628,7 +1623,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) dump_skip_to(cprm, dataoff); - if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count)) + if (!elf_fdpic_dump_segments(cprm, cprm->vma_meta, cprm->vma_count)) goto end_coredump; if (!elf_core_write_extra_data(cprm)) @@ -1652,7 +1647,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) thread_list = thread_list->next; kfree(tmp); } - kvfree(vma_meta); kfree(phdr4note); kfree(elf); kfree(psinfo); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 8202ad6aa1317..a0aa6c7e23351 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1522,8 +1522,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; - if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) + sb_start_write(fs_info->sb); + + if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { + sb_end_write(fs_info->sb); return; + } /* * Long running balances can keep us blocked here for eternity, so @@ -1531,6 +1535,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) */ if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) { btrfs_exclop_finish(fs_info); + sb_end_write(fs_info->sb); return; } @@ -1605,6 +1610,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_exclop_finish(fs_info); + sb_end_write(fs_info->sb); } void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 71e5b2e9a1ba8..6158b870a269d 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -808,7 +808,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, u64 em_len; u64 em_start; struct extent_map *em; - blk_status_t ret = BLK_STS_RESOURCE; + blk_status_t ret; int faili = 0; u8 *sums; @@ -821,14 +821,18 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize); read_unlock(&em_tree->lock); - if (!em) - return BLK_STS_IOERR; + if (!em) { + ret = BLK_STS_IOERR; + goto out; + } ASSERT(em->compress_type != BTRFS_COMPRESS_NONE); compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); - if (!cb) + if (!cb) { + ret = BLK_STS_RESOURCE; goto out; + } refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); cb->errors = 0; @@ -851,8 +855,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE); cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); - if (!cb->compressed_pages) + if (!cb->compressed_pages) { + ret = BLK_STS_RESOURCE; goto fail1; + } for (pg_index = 0; pg_index < nr_pages; pg_index++) { cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS); @@ -938,7 +944,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio = NULL; } } - return 0; + return BLK_STS_OK; fail2: while (faili >= 0) { @@ -951,6 +957,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, kfree(cb); out: free_extent_map(em); + bio->bi_status = ret; + bio_endio(bio); return ret; finish_cb: if (comp_bio) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 48590a3807621..117afcda5affb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -441,17 +441,31 @@ static int csum_one_extent_buffer(struct extent_buffer *eb) else ret = btrfs_check_leaf_full(eb); - if (ret < 0) { - btrfs_print_tree(eb, 0); + if (ret < 0) + goto error; + + /* + * Also check the generation, the eb reached here must be newer than + * last committed. Or something seriously wrong happened. + */ + if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) { + ret = -EUCLEAN; btrfs_err(fs_info, - "block=%llu write time tree block corruption detected", - eb->start); - WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); - return ret; + "block=%llu bad generation, have %llu expect > %llu", + eb->start, btrfs_header_generation(eb), + fs_info->last_trans_committed); + goto error; } write_extent_buffer(eb, result, 0, fs_info->csum_size); return 0; + +error: + btrfs_print_tree(eb, 0); + btrfs_err(fs_info, "block=%llu write time tree block corruption detected", + eb->start); + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + return ret; } /* Checksum all dirty extent buffers in one bio_vec */ diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4c91060d103ae..99028984340aa 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2639,7 +2639,6 @@ int btrfs_repair_one_sector(struct inode *inode, const int icsum = bio_offset >> fs_info->sectorsize_bits; struct bio *repair_bio; struct btrfs_bio *repair_bbio; - blk_status_t status; btrfs_debug(fs_info, "repair read error: read error at %llu", start); @@ -2678,13 +2677,13 @@ int btrfs_repair_one_sector(struct inode *inode, "repair read error: submitting new read to mirror %d", failrec->this_mirror); - status = submit_bio_hook(inode, repair_bio, failrec->this_mirror, - failrec->bio_flags); - if (status) { - free_io_failure(failure_tree, tree, failrec); - bio_put(repair_bio); - } - return blk_status_to_errno(status); + /* + * At this point we have a bio, so any errors from submit_bio_hook() + * will be handled by the endio on the repair_bio, so we can't return an + * error here. + */ + submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags); + return BLK_STS_OK; } static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) @@ -4780,11 +4779,12 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc, return ret; } if (cache) { - /* Impiles write in zoned mode */ - btrfs_put_block_group(cache); - /* Mark the last eb in a block group */ + /* + * Implies write in zoned mode. Mark the last eb in a block group. + */ if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity) set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags); + btrfs_put_block_group(cache); } ret = write_one_eb(eb, wbc, epd); free_extent_buffer(eb); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 90c5c38836ab3..77c8f298f52e2 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -305,7 +305,7 @@ static int search_csum_tree(struct btrfs_fs_info *fs_info, read_extent_buffer(path->nodes[0], dst, (unsigned long)item, ret * csum_size); out: - if (ret == -ENOENT) + if (ret == -ENOENT || ret == -EFBIG) ret = 0; return ret; } @@ -368,6 +368,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_bio *bbio = NULL; struct btrfs_path *path; const u32 sectorsize = fs_info->sectorsize; const u32 csum_size = fs_info->csum_size; @@ -377,6 +378,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst u8 *csum; const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; int count = 0; + blk_status_t ret = BLK_STS_OK; if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) || test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) @@ -400,7 +402,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst return BLK_STS_RESOURCE; if (!dst) { - struct btrfs_bio *bbio = btrfs_bio(bio); + bbio = btrfs_bio(bio); if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS); @@ -456,21 +458,27 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst count = search_csum_tree(fs_info, path, cur_disk_bytenr, search_len, csum_dst); - if (count <= 0) { - /* - * Either we hit a critical error or we didn't find - * the csum. - * Either way, we put zero into the csums dst, and skip - * to the next sector. - */ + if (count < 0) { + ret = errno_to_blk_status(count); + if (bbio) + btrfs_bio_free_csum(bbio); + break; + } + + /* + * We didn't find a csum for this range. We need to make sure + * we complain loudly about this, because we are not NODATASUM. + * + * However for the DATA_RELOC inode we could potentially be + * relocating data extents for a NODATASUM inode, so the inode + * itself won't be marked with NODATASUM, but the extent we're + * copying is in fact NODATASUM. If we don't find a csum we + * assume this is the case. + */ + if (count == 0) { memset(csum_dst, 0, csum_size); count = 1; - /* - * For data reloc inode, we need to mark the range - * NODATASUM so that balance won't report false csum - * error. - */ if (BTRFS_I(inode)->root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { u64 file_offset; @@ -491,7 +499,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst } btrfs_free_path(path); - return BLK_STS_OK; + return ret; } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5bbea5ec31fc5..0f4408f9daddc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2538,10 +2538,15 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, goto out; if (bio_flags & EXTENT_BIO_COMPRESSED) { + /* + * btrfs_submit_compressed_read will handle completing + * the bio if there were any errors, so just return + * here. + */ ret = btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags); - goto out; + goto out_no_endio; } else { /* * Lookup bio sums does extra checks around whether we @@ -2575,6 +2580,7 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, bio->bi_status = ret; bio_endio(bio); } +out_no_endio: return ret; } diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index a3930da4eb3fb..e437238cc603e 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -505,8 +505,11 @@ static int btrfs_clone(struct inode *src, struct inode *inode, */ ASSERT(key.offset == 0); ASSERT(datal <= fs_info->sectorsize); - if (key.offset != 0 || datal > fs_info->sectorsize) - return -EUCLEAN; + if (WARN_ON(key.offset != 0) || + WARN_ON(datal > fs_info->sectorsize)) { + ret = -EUCLEAN; + goto out; + } ret = clone_copy_inline_extent(inode, path, &new_key, drop_start, datal, size, diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 294242c194d80..62382ae1eb02a 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -1061,7 +1061,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) trans_rsv->reserved; if (block_rsv_size < space_info->bytes_may_use) delalloc_size = space_info->bytes_may_use - block_rsv_size; - spin_unlock(&space_info->lock); /* * We don't want to include the global_rsv in our calculation, @@ -1092,6 +1091,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) flush = FLUSH_DELAYED_REFS_NR; } + spin_unlock(&space_info->lock); + /* * We don't want to reclaim everything, just a portion, so scale * down the to_reclaim by 1/4. If it takes us down to 0, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b07d382d53a86..f5b11c1a000aa 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -534,15 +534,48 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, return ret; } -static bool device_path_matched(const char *path, struct btrfs_device *device) +/* + * Check if the device in the path matches the device in the given struct device. + * + * Returns: + * true If it is the same device. + * false If it is not the same device or on error. + */ +static bool device_matched(const struct btrfs_device *device, const char *path) { - int found; + char *device_name; + dev_t dev_old; + dev_t dev_new; + int ret; + + /* + * If we are looking for a device with the matching dev_t, then skip + * device without a name (a missing device). + */ + if (!device->name) + return false; + + device_name = kzalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL); + if (!device_name) + return false; rcu_read_lock(); - found = strcmp(rcu_str_deref(device->name), path); + scnprintf(device_name, BTRFS_PATH_NAME_MAX, "%s", rcu_str_deref(device->name)); rcu_read_unlock(); - return found == 0; + ret = lookup_bdev(device_name, &dev_old); + kfree(device_name); + if (ret) + return false; + + ret = lookup_bdev(path, &dev_new); + if (ret) + return false; + + if (dev_old == dev_new) + return true; + + return false; } /* @@ -575,9 +608,7 @@ static int btrfs_free_stale_devices(const char *path, &fs_devices->devices, dev_list) { if (skip_device && skip_device == device) continue; - if (path && !device->name) - continue; - if (path && !device_path_matched(path, device)) + if (path && !device_matched(device, path)) continue; if (fs_devices->opened) { /* for an already deleted device return 0 */ @@ -8299,10 +8330,12 @@ static int relocating_repair_kthread(void *data) target = cache->start; btrfs_put_block_group(cache); + sb_start_write(fs_info->sb); if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { btrfs_info(fs_info, "zoned: skip relocating block group %llu to repair: EBUSY", target); + sb_end_write(fs_info->sb); return -EBUSY; } @@ -8330,6 +8363,7 @@ static int relocating_repair_kthread(void *data) btrfs_put_block_group(cache); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_exclop_finish(fs_info); + sb_end_write(fs_info->sb); return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index 8e112b6bd3719..c76a8ef60a758 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1235,16 +1235,18 @@ static void bh_lru_install(struct buffer_head *bh) int i; check_irqs_on(); + bh_lru_lock(); + /* * the refcount of buffer_head in bh_lru prevents dropping the * attached page(i.e., try_to_free_buffers) so it could cause * failing page migration. * Skip putting upcoming bh into bh_lru until migration is done. */ - if (lru_cache_disabled()) + if (lru_cache_disabled()) { + bh_lru_unlock(); return; - - bh_lru_lock(); + } b = this_cpu_ptr(&bh_lrus); for (i = 0; i < BH_LRU_SIZE; i++) { diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index cdce1609c5c26..180c234c2f46c 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -396,11 +396,11 @@ static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const ch switch (state) { case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE: cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name); - cifs_mark_tcp_ses_conns_for_reconnect(swnreg->tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(swnreg->tcon->ses->server, true); break; case CIFS_SWN_RESOURCE_STATE_AVAILABLE: cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name); - cifs_mark_tcp_ses_conns_for_reconnect(swnreg->tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(swnreg->tcon->ses->server, true); break; case CIFS_SWN_RESOURCE_STATE_UNKNOWN: cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name); @@ -498,7 +498,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a goto unlock; } - cifs_mark_tcp_ses_conns_for_reconnect(tcon->ses->server, false); + cifs_signal_cifsd_for_reconnect(tcon->ses->server, false); unlock: mutex_unlock(&tcon->ses->server->srv_mutex); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 082c214786867..6e5246122ee2c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -210,6 +210,9 @@ cifs_read_super(struct super_block *sb) if (rc) goto out_no_root; /* tune readahead according to rsize if readahead size not set on mount */ + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + tcon->ses->server->ops->negotiate_rsize(tcon, cifs_sb->ctx); if (cifs_sb->ctx->rasize) sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE; else @@ -254,6 +257,9 @@ static void cifs_kill_sb(struct super_block *sb) struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon; struct cached_fid *cfid; + struct rb_root *root = &cifs_sb->tlink_tree; + struct rb_node *node; + struct tcon_link *tlink; /* * We ned to release all dentries for the cached directories @@ -263,16 +269,18 @@ static void cifs_kill_sb(struct super_block *sb) dput(cifs_sb->root); cifs_sb->root = NULL; } - tcon = cifs_sb_master_tcon(cifs_sb); - if (tcon) { + node = rb_first(root); + while (node != NULL) { + tlink = rb_entry(node, struct tcon_link, tl_rbnode); + tcon = tlink_tcon(tlink); cfid = &tcon->crfid; mutex_lock(&cfid->fid_mutex); if (cfid->dentry) { - dput(cfid->dentry); cfid->dentry = NULL; } mutex_unlock(&cfid->fid_mutex); + node = rb_next(node); } kill_anon_super(sb); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index d3701295402d2..0df3b24a0bf4c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -132,6 +132,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *out_buf, int *bytes_returned); void +cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, + bool all_channels); +void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, bool mark_smb_session); extern int cifs_reconnect(struct TCP_Server_Info *server, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d3020abfe404a..d6f8ccc7bfe2f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -162,11 +162,51 @@ static void cifs_resolve_server(struct work_struct *work) mutex_unlock(&server->srv_mutex); } +/* + * Update the tcpStatus for the server. + * This is used to signal the cifsd thread to call cifs_reconnect + * ONLY cifsd thread should call cifs_reconnect. For any other + * thread, use this function + * + * @server: the tcp ses for which reconnect is needed + * @all_channels: if this needs to be done for all channels + */ +void +cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, + bool all_channels) +{ + struct TCP_Server_Info *pserver; + struct cifs_ses *ses; + int i; + + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + + spin_lock(&cifs_tcp_ses_lock); + if (!all_channels) { + pserver->tcpStatus = CifsNeedReconnect; + spin_unlock(&cifs_tcp_ses_lock); + return; + } + + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + spin_lock(&ses->chan_lock); + for (i = 0; i < ses->chan_count; i++) + ses->chans[i].server->tcpStatus = CifsNeedReconnect; + spin_unlock(&ses->chan_lock); + } + spin_unlock(&cifs_tcp_ses_lock); +} + /* * Mark all sessions and tcons for reconnect. + * IMPORTANT: make sure that this gets called only from + * cifsd thread. For any other thread, use + * cifs_signal_cifsd_for_reconnect * + * @server: the tcp ses for which reconnect is needed * @server needs to be previously set to CifsNeedReconnect. - * + * @mark_smb_session: whether even sessions need to be marked */ void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, @@ -3473,6 +3513,9 @@ static int connect_dfs_target(struct mount_ctx *mnt_ctx, const char *full_path, struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; char *oldmnt = cifs_sb->ctx->mount_options; + cifs_dbg(FYI, "%s: full_path=%s ref_path=%s target=%s\n", __func__, full_path, ref_path, + dfs_cache_get_tgt_name(tit)); + rc = dfs_cache_get_tgt_referral(ref_path, tit, &ref); if (rc) goto out; @@ -3571,13 +3614,18 @@ static int __follow_dfs_link(struct mount_ctx *mnt_ctx) if (rc) goto out; - /* Try all dfs link targets */ + /* Try all dfs link targets. If an I/O fails from currently connected DFS target with an + * error other than STATUS_PATH_NOT_COVERED (-EREMOTE), then retry it from other targets as + * specified in MS-DFSC "3.1.5.2 I/O Operation to Target Fails with an Error Other Than + * STATUS_PATH_NOT_COVERED." + */ for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(&tl); tit; tit = dfs_cache_get_next_tgt(&tl, tit)) { rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->leaf_fullpath + 1, tit); if (!rc) { rc = is_path_remote(mnt_ctx); - break; + if (!rc || rc == -EREMOTE) + break; } } @@ -3651,7 +3699,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) goto error; rc = is_path_remote(&mnt_ctx); - if (rc == -EREMOTE) + if (rc) rc = follow_dfs_link(&mnt_ctx); if (rc) goto error; diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 831f42458bf6d..30e040da4f096 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1355,7 +1355,7 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach } cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__); - cifs_mark_tcp_ses_conns_for_reconnect(tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(tcon->ses->server, true); } /* Refresh dfs referral of tcon and mark it for reconnect if needed */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e7af802dcfa60..a2723f7cb5e9d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3740,6 +3740,11 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, break; } + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), + cifs_sb->ctx); + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits); if (rc) @@ -4474,6 +4479,11 @@ static void cifs_readahead(struct readahead_control *ractl) } } + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), + cifs_sb->ctx); + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits); if (rc) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index b2fb7bd119366..c71c9a44bef4b 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -228,7 +228,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) spin_unlock(&GlobalMid_Lock); if (reconnect) { - cifs_mark_tcp_ses_conns_for_reconnect(server, false); + cifs_signal_cifsd_for_reconnect(server, false); } return mid; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index af5d0830bc8a8..5d120cd8bc78f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -25,6 +25,7 @@ #include "smb2glob.h" #include "cifs_ioctl.h" #include "smbdirect.h" +#include "fscache.h" #include "fs_context.h" /* Change credits for different ops and return the total number of credits */ @@ -1642,6 +1643,7 @@ smb2_ioctl_query_info(const unsigned int xid, unsigned int size[2]; void *data[2]; int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR; + void (*free_req1_func)(struct smb_rqst *r); vars = kzalloc(sizeof(*vars), GFP_ATOMIC); if (vars == NULL) @@ -1651,27 +1653,29 @@ smb2_ioctl_query_info(const unsigned int xid, resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; - if (copy_from_user(&qi, arg, sizeof(struct smb_query_info))) - goto e_fault; - + if (copy_from_user(&qi, arg, sizeof(struct smb_query_info))) { + rc = -EFAULT; + goto free_vars; + } if (qi.output_buffer_length > 1024) { - kfree(vars); - return -EINVAL; + rc = -EINVAL; + goto free_vars; } if (!ses || !server) { - kfree(vars); - return -EIO; + rc = -EIO; + goto free_vars; } if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - buffer = memdup_user(arg + sizeof(struct smb_query_info), - qi.output_buffer_length); - if (IS_ERR(buffer)) { - kfree(vars); - return PTR_ERR(buffer); + if (qi.output_buffer_length) { + buffer = memdup_user(arg + sizeof(struct smb_query_info), qi.output_buffer_length); + if (IS_ERR(buffer)) { + rc = PTR_ERR(buffer); + goto free_vars; + } } /* Open */ @@ -1709,45 +1713,45 @@ smb2_ioctl_query_info(const unsigned int xid, rc = SMB2_open_init(tcon, server, &rqst[0], &oplock, &oparms, path); if (rc) - goto iqinf_exit; + goto free_output_buffer; smb2_set_next_command(tcon, &rqst[0]); /* Query */ if (qi.flags & PASSTHRU_FSCTL) { /* Can eventually relax perm check since server enforces too */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) { rc = -EPERM; - else { - rqst[1].rq_iov = &vars->io_iov[0]; - rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE; - - rc = SMB2_ioctl_init(tcon, server, - &rqst[1], - COMPOUND_FID, COMPOUND_FID, - qi.info_type, true, buffer, - qi.output_buffer_length, - CIFSMaxBufSize - - MAX_SMB2_CREATE_RESPONSE_SIZE - - MAX_SMB2_CLOSE_RESPONSE_SIZE); + goto free_open_req; } + rqst[1].rq_iov = &vars->io_iov[0]; + rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE; + + rc = SMB2_ioctl_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID, + qi.info_type, true, buffer, qi.output_buffer_length, + CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - + MAX_SMB2_CLOSE_RESPONSE_SIZE); + free_req1_func = SMB2_ioctl_free; } else if (qi.flags == PASSTHRU_SET_INFO) { /* Can eventually relax perm check since server enforces too */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) { rc = -EPERM; - else { - rqst[1].rq_iov = &vars->si_iov[0]; - rqst[1].rq_nvec = 1; - - size[0] = 8; - data[0] = buffer; - - rc = SMB2_set_info_init(tcon, server, - &rqst[1], - COMPOUND_FID, COMPOUND_FID, - current->tgid, - FILE_END_OF_FILE_INFORMATION, - SMB2_O_INFO_FILE, 0, data, size); + goto free_open_req; + } + if (qi.output_buffer_length < 8) { + rc = -EINVAL; + goto free_open_req; } + rqst[1].rq_iov = &vars->si_iov[0]; + rqst[1].rq_nvec = 1; + + /* MS-FSCC 2.4.13 FileEndOfFileInformation */ + size[0] = 8; + data[0] = buffer; + + rc = SMB2_set_info_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID, + current->tgid, FILE_END_OF_FILE_INFORMATION, + SMB2_O_INFO_FILE, 0, data, size); + free_req1_func = SMB2_set_info_free; } else if (qi.flags == PASSTHRU_QUERY_INFO) { rqst[1].rq_iov = &vars->qi_iov[0]; rqst[1].rq_nvec = 1; @@ -1758,6 +1762,7 @@ smb2_ioctl_query_info(const unsigned int xid, qi.info_type, qi.additional_information, qi.input_buffer_length, qi.output_buffer_length, buffer); + free_req1_func = SMB2_query_info_free; } else { /* unknown flags */ cifs_tcon_dbg(VFS, "Invalid passthru query flags: 0x%x\n", qi.flags); @@ -1765,7 +1770,7 @@ smb2_ioctl_query_info(const unsigned int xid, } if (rc) - goto iqinf_exit; + goto free_open_req; smb2_set_next_command(tcon, &rqst[1]); smb2_set_related(&rqst[1]); @@ -1776,14 +1781,14 @@ smb2_ioctl_query_info(const unsigned int xid, rc = SMB2_close_init(tcon, server, &rqst[2], COMPOUND_FID, COMPOUND_FID, false); if (rc) - goto iqinf_exit; + goto free_req_1; smb2_set_related(&rqst[2]); rc = compound_send_recv(xid, ses, server, flags, 3, rqst, resp_buftype, rsp_iov); if (rc) - goto iqinf_exit; + goto out; /* No need to bump num_remote_opens since handle immediately closed */ if (qi.flags & PASSTHRU_FSCTL) { @@ -1793,18 +1798,22 @@ smb2_ioctl_query_info(const unsigned int xid, qi.input_buffer_length = le32_to_cpu(io_rsp->OutputCount); if (qi.input_buffer_length > 0 && le32_to_cpu(io_rsp->OutputOffset) + qi.input_buffer_length - > rsp_iov[1].iov_len) - goto e_fault; + > rsp_iov[1].iov_len) { + rc = -EFAULT; + goto out; + } if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length, - sizeof(qi.input_buffer_length))) - goto e_fault; + sizeof(qi.input_buffer_length))) { + rc = -EFAULT; + goto out; + } if (copy_to_user((void __user *)pqi + sizeof(struct smb_query_info), (const void *)io_rsp + le32_to_cpu(io_rsp->OutputOffset), qi.input_buffer_length)) - goto e_fault; + rc = -EFAULT; } else { pqi = (struct smb_query_info __user *)arg; qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; @@ -1812,28 +1821,30 @@ smb2_ioctl_query_info(const unsigned int xid, qi.input_buffer_length = le32_to_cpu(qi_rsp->OutputBufferLength); if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length, - sizeof(qi.input_buffer_length))) - goto e_fault; + sizeof(qi.input_buffer_length))) { + rc = -EFAULT; + goto out; + } if (copy_to_user(pqi + 1, qi_rsp->Buffer, qi.input_buffer_length)) - goto e_fault; + rc = -EFAULT; } - iqinf_exit: - cifs_small_buf_release(rqst[0].rq_iov[0].iov_base); - cifs_small_buf_release(rqst[1].rq_iov[0].iov_base); - cifs_small_buf_release(rqst[2].rq_iov[0].iov_base); +out: free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base); - kfree(vars); + SMB2_close_free(&rqst[2]); +free_req_1: + free_req1_func(&rqst[1]); +free_open_req: + SMB2_open_free(&rqst[0]); +free_output_buffer: kfree(buffer); +free_vars: + kfree(vars); return rc; - -e_fault: - rc = -EFAULT; - goto iqinf_exit; } static ssize_t @@ -3887,29 +3898,38 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, { int rc; unsigned int xid; + struct inode *inode; struct cifsFileInfo *cfile = file->private_data; + struct cifsInodeInfo *cifsi; __le64 eof; xid = get_xid(); - if (off >= i_size_read(file->f_inode) || - off + len >= i_size_read(file->f_inode)) { + inode = d_inode(cfile->dentry); + cifsi = CIFS_I(inode); + + if (off >= i_size_read(inode) || + off + len >= i_size_read(inode)) { rc = -EINVAL; goto out; } rc = smb2_copychunk_range(xid, cfile, cfile, off + len, - i_size_read(file->f_inode) - off - len, off); + i_size_read(inode) - off - len, off); if (rc < 0) goto out; - eof = cpu_to_le64(i_size_read(file->f_inode) - len); + eof = cpu_to_le64(i_size_read(inode) - len); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); if (rc < 0) goto out; rc = 0; + + cifsi->server_eof = i_size_read(inode) - len; + truncate_setsize(inode, cifsi->server_eof); + fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof); out: free_xid(xid); return rc; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7e7909b1ae118..f82d6fcb5c646 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3858,8 +3858,10 @@ void smb2_reconnect_server(struct work_struct *work) tcon = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL); if (!tcon) { resched = true; - list_del_init(&ses->rlist); - cifs_put_smb_ses(ses); + list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { + list_del_init(&ses->rlist); + cifs_put_smb_ses(ses); + } goto done; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index a4c3e027cca25..eeb1a699bd6f2 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -430,7 +430,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, * be taken as the remainder of this one. We need to kill the * socket so the server throws away the partial SMB */ - cifs_mark_tcp_ses_conns_for_reconnect(server, false); + cifs_signal_cifsd_for_reconnect(server, false); trace_smb3_partial_send_reconnect(server->CurrentMid, server->conn_id, server->hostname); } diff --git a/fs/coredump.c b/fs/coredump.c index 1c060c0a2d72f..7ed7d601e5e00 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,9 @@ #include +static bool dump_vma_snapshot(struct coredump_params *cprm); +static void free_vma_snapshot(struct coredump_params *cprm); + static int core_uses_pid; static unsigned int core_pipe_limit; static char core_pattern[CORENAME_MAX_SIZE] = "core"; @@ -531,6 +535,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) * by any locks. */ .mm_flags = mm->flags, + .vma_meta = NULL, }; audit_core_dumps(siginfo->si_signo); @@ -745,6 +750,9 @@ void do_coredump(const kernel_siginfo_t *siginfo) pr_info("Core dump to |%s disabled\n", cn.corename); goto close_fail; } + if (!dump_vma_snapshot(&cprm)) + goto close_fail; + file_start_write(cprm.file); core_dumped = binfmt->core_dump(&cprm); /* @@ -758,6 +766,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) dump_emit(&cprm, "", 1); } file_end_write(cprm.file); + free_vma_snapshot(&cprm); } if (ispipe && core_pipe_limit) wait_for_dump_helpers(cprm.file); @@ -980,6 +989,8 @@ static bool always_dump_vma(struct vm_area_struct *vma) return false; } +#define DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER 1 + /* * Decide how much of @vma's contents should be included in a core dump. */ @@ -1039,9 +1050,20 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, * dump the first page to aid in determining what was mapped here. */ if (FILTER(ELF_HEADERS) && - vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) && - (READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0) - return PAGE_SIZE; + vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { + if ((READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0) + return PAGE_SIZE; + + /* + * ELF libraries aren't always executable. + * We'll want to check whether the mapping starts with the ELF + * magic, but not now - we're holding the mmap lock, + * so copy_from_user() doesn't work here. + * Use a placeholder instead, and fix it up later in + * dump_vma_snapshot(). + */ + return DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER; + } #undef FILTER @@ -1078,18 +1100,29 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, return gate_vma; } +static void free_vma_snapshot(struct coredump_params *cprm) +{ + if (cprm->vma_meta) { + int i; + for (i = 0; i < cprm->vma_count; i++) { + struct file *file = cprm->vma_meta[i].file; + if (file) + fput(file); + } + kvfree(cprm->vma_meta); + cprm->vma_meta = NULL; + } +} + /* * Under the mmap_lock, take a snapshot of relevant information about the task's * VMAs. */ -int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, - struct core_vma_metadata **vma_meta, - size_t *vma_data_size_ptr) +static bool dump_vma_snapshot(struct coredump_params *cprm) { struct vm_area_struct *vma, *gate_vma; struct mm_struct *mm = current->mm; int i; - size_t vma_data_size = 0; /* * Once the stack expansion code is fixed to not change VMA bounds @@ -1097,36 +1130,51 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, * mmap_lock in read mode. */ if (mmap_write_lock_killable(mm)) - return -EINTR; + return false; + cprm->vma_data_size = 0; gate_vma = get_gate_vma(mm); - *vma_count = mm->map_count + (gate_vma ? 1 : 0); + cprm->vma_count = mm->map_count + (gate_vma ? 1 : 0); - *vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL); - if (!*vma_meta) { + cprm->vma_meta = kvmalloc_array(cprm->vma_count, sizeof(*cprm->vma_meta), GFP_KERNEL); + if (!cprm->vma_meta) { mmap_write_unlock(mm); - return -ENOMEM; + return false; } for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; vma = next_vma(vma, gate_vma), i++) { - struct core_vma_metadata *m = (*vma_meta) + i; + struct core_vma_metadata *m = cprm->vma_meta + i; m->start = vma->vm_start; m->end = vma->vm_end; m->flags = vma->vm_flags; m->dump_size = vma_dump_size(vma, cprm->mm_flags); + m->pgoff = vma->vm_pgoff; - vma_data_size += m->dump_size; + m->file = vma->vm_file; + if (m->file) + get_file(m->file); } mmap_write_unlock(mm); - if (WARN_ON(i != *vma_count)) { - kvfree(*vma_meta); - return -EFAULT; + for (i = 0; i < cprm->vma_count; i++) { + struct core_vma_metadata *m = cprm->vma_meta + i; + + if (m->dump_size == DUMP_SIZE_MAYBE_ELFHDR_PLACEHOLDER) { + char elfmag[SELFMAG]; + + if (copy_from_user(elfmag, (void __user *)m->start, SELFMAG) || + memcmp(elfmag, ELFMAG, SELFMAG) != 0) { + m->dump_size = 0; + } else { + m->dump_size = PAGE_SIZE; + } + } + + cprm->vma_data_size += m->dump_size; } - *vma_data_size_ptr = vma_data_size; - return 0; + return true; } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index dac252bc92281..f3babf1e66083 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -221,9 +221,11 @@ void erofs_unregister_sysfs(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); - kobject_del(&sbi->s_kobj); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); + if (sbi->s_kobj.state_in_sysfs) { + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + } } int __init erofs_init_sysfs(void) diff --git a/fs/exec.c b/fs/exec.c index 79f2c9483302d..6027e2a939a74 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -495,8 +495,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm) * the stack. They aren't stored until much later when we can't * signal to the parent that the child has run out of stack space. * Instead, calculate it here so it's possible to fail gracefully. + * + * In the case of argc = 0, make sure there is space for adding a + * empty string (which will bump argc to 1), to ensure confused + * userspace programs don't start processing from argv[1], thinking + * argc can never be 0, to keep them from walking envp by accident. + * See do_execveat_common(). */ - ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *); if (limit <= ptr_size) return -E2BIG; limit -= ptr_size; @@ -1006,6 +1012,7 @@ static int exec_mmap(struct mm_struct *mm) active_mm = tsk->active_mm; tsk->active_mm = mm; tsk->mm = mm; + lru_gen_add_mm(mm); /* * This prevents preemption while active_mm is being loaded and * it and mm are being updated, which could cause problems for @@ -1018,6 +1025,7 @@ static int exec_mmap(struct mm_struct *mm) activate_mm(active_mm, mm); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); + lru_gen_use_mm(mm); tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); @@ -1897,6 +1905,9 @@ static int do_execveat_common(int fd, struct filename *filename, } retval = count(argv, MAX_ARG_STRINGS); + if (retval == 0) + pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n", + current->comm, bprm->filename); if (retval < 0) goto out_free; bprm->argc = retval; @@ -1923,6 +1934,19 @@ static int do_execveat_common(int fd, struct filename *filename, if (retval < 0) goto out_free; + /* + * When argv is empty, add an empty string ("") as argv[0] to + * ensure confused userspace programs that start processing + * from argv[1] won't end up walking envp. See also + * bprm_stack_limits(). + */ + if (bprm->argc == 0) { + retval = copy_string_kernel("", bprm); + if (retval < 0) + goto out_free; + bprm->argc = 1; + } + retval = bprm_execve(bprm, fd, filename, flags); out_free: free_bprm(bprm); @@ -1951,6 +1975,8 @@ int kernel_execve(const char *kernel_filename, } retval = count_strings_kernel(argv); + if (WARN_ON_ONCE(retval == 0)) + retval = -EINVAL; if (retval < 0) goto out_free; bprm->argc = retval; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 94f1fbd7d3ac2..6d4f5ef747660 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -753,8 +753,12 @@ static loff_t ext2_max_size(int bits) res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); res += 1LL << (3*(bits-2)); + /* Compute how many metadata blocks are needed */ + meta_blocks = 1; + meta_blocks += 1 + ppb; + meta_blocks += 1 + ppb + ppb * ppb; /* Does block tree limit file size? */ - if (res < upper_limit) + if (res + meta_blocks <= upper_limit) goto check_lfs; res = upper_limit; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index e429418036050..9c076262770d9 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1783,19 +1783,20 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; - bool ret = true; + bool ret = false; err = ext4_get_inode_loc(dir, &iloc); if (err) { EXT4_ERROR_INODE_ERR(dir, -err, "error %d getting inode %lu block", err, dir->i_ino); - return true; + return false; } down_read(&EXT4_I(dir)->xattr_sem); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; + ret = true; goto out; } @@ -1804,7 +1805,6 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - no `..'", dir->i_ino); - ret = true; goto out; } @@ -1823,16 +1823,15 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data) dir->i_ino, le32_to_cpu(de->inode), le16_to_cpu(de->rec_len), de->name_len, inline_size); - ret = true; goto out; } if (le32_to_cpu(de->inode)) { - ret = false; goto out; } offset += ext4_rec_len_from_disk(de->rec_len, inline_size); } + ret = true; out: up_read(&EXT4_I(dir)->xattr_sem); brelse(iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 01c9e4f743ba9..531a94f48637c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1993,6 +1993,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; + /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2594,6 +2603,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); + /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 67ac95c4cd9b8..1f37eb0176ccc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1000,7 +1000,7 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac) return 0; if (ac->ac_criteria >= 2) return 0; - if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) + if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) return 0; return 1; } @@ -3899,69 +3899,95 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ext4_grpblk_t blkoff; - int i, clen, err; + int i, err; int already; + unsigned int clen, clen_changed, thisgrp_len; - clen = EXT4_B2C(sbi, len); + while (len > 0) { + ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - ext4_get_group_no_and_offset(sb, block, &group, &blkoff); - bitmap_bh = ext4_read_block_bitmap(sb, group); - if (IS_ERR(bitmap_bh)) { - err = PTR_ERR(bitmap_bh); - bitmap_bh = NULL; - goto out_err; - } + /* + * Check to see if we are freeing blocks across a group + * boundary. + * In case of flex_bg, this can happen that (block, len) may + * span across more than one group. In that case we need to + * get the corresponding group metadata to work with. + * For this we have goto again loop. + */ + thisgrp_len = min_t(unsigned int, (unsigned int)len, + EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff)); + clen = EXT4_NUM_B2C(sbi, thisgrp_len); - err = -EIO; - gdp = ext4_get_group_desc(sb, group, &gdp_bh); - if (!gdp) - goto out_err; + bitmap_bh = ext4_read_block_bitmap(sb, group); + if (IS_ERR(bitmap_bh)) { + err = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; + break; + } - ext4_lock_group(sb, group); - already = 0; - for (i = 0; i < clen; i++) - if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) - already++; + err = -EIO; + gdp = ext4_get_group_desc(sb, group, &gdp_bh); + if (!gdp) + break; - if (state) - ext4_set_bits(bitmap_bh->b_data, blkoff, clen); - else - mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); - if (ext4_has_group_desc_csum(sb) && - (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - ext4_free_group_clusters_set(sb, gdp, - ext4_free_clusters_after_init(sb, - group, gdp)); - } - if (state) - clen = ext4_free_group_clusters(sb, gdp) - clen + already; - else - clen = ext4_free_group_clusters(sb, gdp) + clen - already; + ext4_lock_group(sb, group); + already = 0; + for (i = 0; i < clen; i++) + if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == + !state) + already++; + + clen_changed = clen - already; + if (state) + ext4_set_bits(bitmap_bh->b_data, blkoff, clen); + else + mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen); + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_clusters_after_init(sb, group, gdp)); + } + if (state) + clen = ext4_free_group_clusters(sb, gdp) - clen_changed; + else + clen = ext4_free_group_clusters(sb, gdp) + clen_changed; - ext4_free_group_clusters_set(sb, gdp, clen); - ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); + ext4_free_group_clusters_set(sb, gdp, clen); + ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); - ext4_unlock_group(sb, group); + ext4_unlock_group(sb, group); - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, group); + if (sbi->s_log_groups_per_flex) { + ext4_group_t flex_group = ext4_flex_group(sbi, group); + struct flex_groups *fg = sbi_array_rcu_deref(sbi, + s_flex_groups, flex_group); - atomic64_sub(len, - &sbi_array_rcu_deref(sbi, s_flex_groups, - flex_group)->free_clusters); + if (state) + atomic64_sub(clen_changed, &fg->free_clusters); + else + atomic64_add(clen_changed, &fg->free_clusters); + + } + + err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); + if (err) + break; + sync_dirty_buffer(bitmap_bh); + err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); + sync_dirty_buffer(gdp_bh); + if (err) + break; + + block += thisgrp_len; + len -= thisgrp_len; + brelse(bitmap_bh); + BUG_ON(len < 0); } - err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh); if (err) - goto out_err; - sync_dirty_buffer(bitmap_bh); - err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh); - sync_dirty_buffer(gdp_bh); - -out_err: - brelse(bitmap_bh); + brelse(bitmap_bh); } /* diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8cf0a924a49bf..39e223f7bf64d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2997,14 +2997,14 @@ bool ext4_empty_dir(struct inode *inode) if (inode->i_size < ext4_dir_rec_len(1, NULL) + ext4_dir_rec_len(2, NULL)) { EXT4_ERROR_INODE(inode, "invalid size"); - return true; + return false; } /* The first directory block must not be a hole, * so treat it as DIRENT_HTREE */ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) - return true; + return false; de = (struct ext4_dir_entry_2 *) bh->b_data; if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, @@ -3012,7 +3012,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { ext4_warning_inode(inode, "directory missing '.'"); brelse(bh); - return true; + return false; } offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); de = ext4_next_entry(de, sb->s_blocksize); @@ -3021,7 +3021,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { ext4_warning_inode(inode, "directory missing '..'"); brelse(bh); - return true; + return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { @@ -3035,7 +3035,7 @@ bool ext4_empty_dir(struct inode *inode) continue; } if (IS_ERR(bh)) - return true; + return false; } de = (struct ext4_dir_entry_2 *) (bh->b_data + (offset & (sb->s_blocksize - 1))); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c5021ca0a28ad..bed29f96ccc7e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2021,12 +2021,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) #define EXT4_SPEC_s_commit_interval (1 << 16) #define EXT4_SPEC_s_fc_debug_max_replay (1 << 17) #define EXT4_SPEC_s_sb_block (1 << 18) +#define EXT4_SPEC_mb_optimize_scan (1 << 19) struct ext4_fs_context { char *s_qf_names[EXT4_MAXQUOTAS]; char *test_dummy_enc_arg; int s_jquota_fmt; /* Format of quota to use */ - int mb_optimize_scan; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif @@ -2045,8 +2045,8 @@ struct ext4_fs_context { unsigned int mask_s_mount_opt; unsigned int vals_s_mount_opt2; unsigned int mask_s_mount_opt2; - unsigned int vals_s_mount_flags; - unsigned int mask_s_mount_flags; + unsigned long vals_s_mount_flags; + unsigned long mask_s_mount_flags; unsigned int opt_flags; /* MOPT flags */ unsigned int spec; u32 s_max_batch_time; @@ -2149,23 +2149,36 @@ static inline void ctx_set_##name(struct ext4_fs_context *ctx, \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name |= flag; \ -} \ +} + +#define EXT4_CLEAR_CTX(name) \ static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \ unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name &= ~flag; \ -} \ +} + +#define EXT4_TEST_CTX(name) \ static inline unsigned long \ ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \ { \ return (ctx->vals_s_##name & flag); \ -} \ +} -EXT4_SET_CTX(flags); +EXT4_SET_CTX(flags); /* set only */ EXT4_SET_CTX(mount_opt); +EXT4_CLEAR_CTX(mount_opt); +EXT4_TEST_CTX(mount_opt); EXT4_SET_CTX(mount_opt2); -EXT4_SET_CTX(mount_flags); +EXT4_CLEAR_CTX(mount_opt2); +EXT4_TEST_CTX(mount_opt2); + +static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit) +{ + set_bit(bit, &ctx->mask_s_mount_flags); + set_bit(bit, &ctx->vals_s_mount_flags); +} static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) { @@ -2235,7 +2248,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) param->key); return 0; case Opt_abort: - ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED); + ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); return 0; case Opt_i_version: ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); @@ -2451,12 +2464,17 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx_clear_mount_opt(ctx, m->mount_opt); return 0; case Opt_mb_optimize_scan: - if (result.int_32 != 0 && result.int_32 != 1) { + if (result.int_32 == 1) { + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else if (result.int_32 == 0) { + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else { ext4_msg(NULL, KERN_WARNING, "mb_optimize_scan should be set to 0 or 1."); return -EINVAL; } - ctx->mb_optimize_scan = result.int_32; return 0; } @@ -4369,7 +4387,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) /* Set defaults for the variables that will be set during parsing */ ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - ctx->mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sectors_written_start = @@ -5320,12 +5337,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * turned off by passing "mb_optimize_scan=0". This can also be * turned on forcefully by passing "mb_optimize_scan=1". */ - if (ctx->mb_optimize_scan == 1) - set_opt2(sb, MB_OPTIMIZE_SCAN); - else if (ctx->mb_optimize_scan == 0) - clear_opt2(sb, MB_OPTIMIZE_SCAN); - else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) - set_opt2(sb, MB_OPTIMIZE_SCAN); + if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) { + if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) + set_opt2(sb, MB_OPTIMIZE_SCAN); + else + clear_opt2(sb, MB_OPTIMIZE_SCAN); + } err = ext4_mb_init(sb); if (err) { diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 982f0170639fc..bf3ba85cf325b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -864,6 +864,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, struct page *cp_page_1 = NULL, *cp_page_2 = NULL; struct f2fs_checkpoint *cp_block = NULL; unsigned long long cur_version = 0, pre_version = 0; + unsigned int cp_blocks; int err; err = get_checkpoint_version(sbi, cp_addr, &cp_block, @@ -871,15 +872,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (err) return NULL; - if (le32_to_cpu(cp_block->cp_pack_total_block_count) > - sbi->blocks_per_seg) { + cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count); + + if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) { f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; } pre_version = *version; - cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_addr += cp_blocks - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d0c3aeba59454..3b162506b269a 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -314,10 +314,9 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) } if (ret != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, " + printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, " "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, - dic->rlen, + F2FS_I_SB(dic->inode)->sb->s_id, ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8c417864c66ae..bdfa8bed10b2c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3163,8 +3163,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping, /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[DATA]); - else if (atomic_read(&sbi->wb_sync_req[DATA])) + else if (atomic_read(&sbi->wb_sync_req[DATA])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } if (__should_serialize_io(inode, wbc)) { mutex_lock(&sbi->writepages); @@ -3353,7 +3357,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, *fsdata = NULL; - if (len == PAGE_SIZE) + if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode))) goto repeat; ret = f2fs_prepare_compress_overwrite(inode, pagep, diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8c50518475a99..b449c7a372a4b 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,7 +21,7 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static DEFINE_MUTEX(f2fs_stat_mutex); +static DEFINE_RAW_SPINLOCK(f2fs_stat_lock); #ifdef CONFIG_DEBUG_FS static struct dentry *f2fs_debugfs_root; #endif @@ -338,14 +338,16 @@ static char *s_flag[] = { [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush", [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair", [SBI_IS_RESIZEFS] = " resizefs", + [SBI_IS_FREEZING] = " freezefs", }; static int stat_show(struct seq_file *s, void *v) { struct f2fs_stat_info *si; int i = 0, j = 0; + unsigned long flags; - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_for_each_entry(si, &f2fs_stat_list, stat_list) { update_general_status(si->sbi); @@ -573,7 +575,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); return 0; } @@ -584,6 +586,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + unsigned long flags; int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); @@ -619,9 +622,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->max_aw_cnt, 0); atomic_set(&sbi->max_vw_cnt, 0); - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_add_tail(&si->stat_list, &f2fs_stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); return 0; } @@ -629,10 +632,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned long flags; - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_del(&si->stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); kfree(si); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 68b44015514f5..2514597f5b26b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1267,6 +1267,7 @@ enum { SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ SBI_IS_RESIZEFS, /* resizefs is in process */ + SBI_IS_FREEZING, /* freezefs is in process */ }; enum { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3c98ef6af97d1..b110c3a7db6ae 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2008,7 +2008,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); - f2fs_disable_compressed_file(inode); + if (!f2fs_disable_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } if (f2fs_is_atomic_file(inode)) { if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ee308a8de4327..e020804f7b075 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1038,8 +1038,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, set_sbi_flag(sbi, SBI_NEED_FSCK); } - if (f2fs_check_nid_range(sbi, dni->ino)) + if (f2fs_check_nid_range(sbi, dni->ino)) { + f2fs_put_page(node_page, 1); return false; + } *nofs = ofs_of_node(node_page); source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0ec8e32a00b47..71f232dcf3c20 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode) f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); - sb_start_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: @@ -809,7 +810,8 @@ void f2fs_evict_inode(struct inode *inode) if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); } - sb_end_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); @@ -885,6 +887,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); + set_inode_flag(inode, FI_FREE_NID); f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 50b2874e758c9..4ff7dfb542502 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2111,8 +2111,12 @@ static int f2fs_write_node_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[NODE]); - else if (atomic_read(&sbi->wb_sync_req[NODE])) + else if (atomic_read(&sbi->wb_sync_req[NODE])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } trace_f2fs_writepages(mapping->host, wbc, NODE); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1dabc8244083d..416d802ebbea6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4789,6 +4789,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) sanity_check_seg_type(sbi, curseg->seg_type); + if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) { + f2fs_err(sbi, + "Current segment has invalid alloc_type:%d", + curseg->alloc_type); + return -EFSCORRUPTED; + } + if (f2fs_test_bit(blkofs, se->cur_valid_map)) goto out; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index baefd398ec1a3..c4f8510fac930 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1662,11 +1662,15 @@ static int f2fs_freeze(struct super_block *sb) /* ensure no checkpoint required */ if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list)) return -EINVAL; + + /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */ + set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } static int f2fs_unfreeze(struct super_block *sb) { + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } @@ -2688,7 +2692,7 @@ int f2fs_quota_sync(struct super_block *sb, int type) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; - int ret; + int ret = 0; /* * Now when everything is written we can discard the pagecache so @@ -2699,8 +2703,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) if (type != -1 && cnt != type) continue; - if (!sb_has_quota_active(sb, type)) - return 0; + if (!sb_has_quota_active(sb, cnt)) + continue; inode_lock(dqopt->files[cnt]); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 8ac5066712454..bdb1b5c05be2e 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -481,7 +481,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a, } else if (t == GC_IDLE_AT) { if (!sbi->am.atgc_enabled) return -EINVAL; - sbi->gc_mode = GC_AT; + sbi->gc_mode = GC_IDLE_AT; } else { sbi->gc_mode = GC_NORMAL; } diff --git a/fs/file.c b/fs/file.c index 97d212a9b8144..ee93173467025 100644 --- a/fs/file.c +++ b/fs/file.c @@ -87,6 +87,21 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); } +/* + * Note how the fdtable bitmap allocations very much have to be a multiple of + * BITS_PER_LONG. This is not only because we walk those things in chunks of + * 'unsigned long' in some places, but simply because that is how the Linux + * kernel bitmaps are defined to work: they are not "bits in an array of bytes", + * they are very much "bits in an array of unsigned long". + * + * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied + * by that "1024/sizeof(ptr)" before, we already know there are sufficient + * clear low bits. Clang seems to realize that, gcc ends up being confused. + * + * On a 128-bit machine, the ALIGN() would actually matter. In the meantime, + * let's consider it documentation (and maybe a test-case for gcc to improve + * its code generation ;) + */ static struct fdtable * alloc_fdtable(unsigned int nr) { struct fdtable *fdt; @@ -102,6 +117,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr) nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); + nr = ALIGN(nr, BITS_PER_LONG); /* * Note that this can drive nr *below* what we had passed if sysctl_nr_open * had been set lower between the check in expand_files() and here. Deal @@ -269,6 +285,19 @@ static unsigned int count_open_files(struct fdtable *fdt) return i; } +/* + * Note that a sane fdtable size always has to be a multiple of + * BITS_PER_LONG, since we have bitmaps that are sized by this. + * + * 'max_fds' will normally already be properly aligned, but it + * turns out that in the close_range() -> __close_range() -> + * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end + * up having a 'max_fds' value that isn't already aligned. + * + * Rather than make close_range() have to worry about this, + * just make that BITS_PER_LONG alignment be part of a sane + * fdtable size. Becuase that's really what it is. + */ static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) { unsigned int count; @@ -276,7 +305,7 @@ static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) count = count_open_files(fdt); if (max_fds < NR_OPEN_DEFAULT) max_fds = NR_OPEN_DEFAULT; - return min(count, max_fds); + return ALIGN(min(count, max_fds), BITS_PER_LONG); } /* diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 592730fd6e424..e7c0aa6d61ced 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -785,7 +785,8 @@ static int fuse_check_page(struct page *page) 1 << PG_active | 1 << PG_workingset | 1 << PG_reclaim | - 1 << PG_waiters))) { + 1 << PG_waiters | + LRU_GEN_MASK | LRU_REFS_MASK))) { dump_page(page, "fuse: trying to steal weird page"); return 1; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index d67108489148e..fbdb7a30470a3 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2146,7 +2146,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: - gfs2_rs_delete(ip, NULL); + gfs2_rs_delete(ip); gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 8c39a8571b1fa..b53ad18e5ccbf 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) { if (gfs2_rs_active(&ip->i_res)) - gfs2_rs_delete(ip, &inode->i_writecount); + gfs2_rs_delete(ip); gfs2_qa_put(ip); } return 0; @@ -1083,6 +1083,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, gfs2_holder_uninit(gh); if (statfs_gh) kfree(statfs_gh); + from->count = orig_count - read; return read ? read : ret; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 89905f4f29bb6..66a123306aecb 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -793,7 +793,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (free_vfs_inode) /* else evict will do the put for us */ gfs2_glock_put(ip->i_gl); } - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0fb3c01bc5577..3b34bb24d0af4 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) + if (atomic_read(&inode->i_writecount) <= 1) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); } @@ -1415,7 +1416,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); - minlen = max_t(u64, r.minlen, + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); + minlen = max_t(u64, minlen, q->limits.discard_granularity) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3e2ca1fb43056..46dd94e9e085c 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 64c67090f5036..143a47359d1b8 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1396,7 +1396,7 @@ static void gfs2_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); diff --git a/fs/io_uring.c b/fs/io_uring.c index 4715980e90150..5e6788ab188fa 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2813,8 +2813,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req) static bool __io_complete_rw_common(struct io_kiocb *req, long res) { - if (req->rw.kiocb.ki_flags & IOCB_WRITE) + if (req->rw.kiocb.ki_flags & IOCB_WRITE) { kiocb_end_write(req); + fsnotify_modify(req->file); + } else { + fsnotify_access(req->file); + } if (unlikely(res != req->result)) { if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) { @@ -3436,13 +3440,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) ret = nr; break; } + ret += nr; if (!iov_iter_is_bvec(iter)) { iov_iter_advance(iter, nr); } else { - req->rw.len -= nr; req->rw.addr += nr; + req->rw.len -= nr; + if (!req->rw.len) + break; } - ret += nr; if (nr != iovec.iov_len) break; } @@ -4301,6 +4307,8 @@ static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) req->sync.len); if (ret < 0) req_set_fail(req); + else + fsnotify_modify(req->file); io_req_complete(req, ret); return 0; } @@ -5258,8 +5266,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) accept->nofile = rlimit(RLIMIT_NOFILE); accept->file_slot = READ_ONCE(sqe->file_index); - if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) || - (accept->flags & SOCK_CLOEXEC))) + if (accept->file_slot && (accept->flags & SOCK_CLOEXEC)) return -EINVAL; if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return -EINVAL; @@ -5407,7 +5414,7 @@ struct io_poll_table { }; #define IO_POLL_CANCEL_FLAG BIT(31) -#define IO_POLL_REF_MASK ((1u << 20)-1) +#define IO_POLL_REF_MASK GENMASK(30, 0) /* * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can @@ -5863,6 +5870,7 @@ static __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, list = &ctx->cancel_hash[i]; hlist_for_each_entry_safe(req, tmp, list, hash_node) { if (io_match_task_safe(req, tsk, cancel_all)) { + hlist_del_init(&req->hash_node); io_poll_cancel_req(req); found = true; } @@ -8233,6 +8241,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) fput(fpl->fp[i]); } else { kfree_skb(skb); + free_uid(fpl->user); kfree(fpl); } diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 6c51a75d0be61..d020a2e81a24c 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -480,7 +480,8 @@ EXPORT_SYMBOL_GPL(iomap_releasepage); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) { - trace_iomap_invalidatepage(folio->mapping->host, offset, len); + trace_iomap_invalidatepage(folio->mapping->host, + folio_pos(folio) + offset, len); /* * If we're invalidating the entire folio, clear the dirty state diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8e2f8275a2535..259e00046a8bd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -842,27 +842,38 @@ EXPORT_SYMBOL(jbd2_journal_restart); */ void jbd2_journal_wait_updates(journal_t *journal) { - transaction_t *commit_transaction = journal->j_running_transaction; + DEFINE_WAIT(wait); - if (!commit_transaction) - return; + while (1) { + /* + * Note that the running transaction can get freed under us if + * this transaction is getting committed in + * jbd2_journal_commit_transaction() -> + * jbd2_journal_free_transaction(). This can only happen when we + * release j_state_lock -> schedule() -> acquire j_state_lock. + * Hence we should everytime retrieve new j_running_transaction + * value (after j_state_lock release acquire cycle), else it may + * lead to use-after-free of old freed transaction. + */ + transaction_t *transaction = journal->j_running_transaction; - spin_lock(&commit_transaction->t_handle_lock); - while (atomic_read(&commit_transaction->t_updates)) { - DEFINE_WAIT(wait); + if (!transaction) + break; + spin_lock(&transaction->t_handle_lock); prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&commit_transaction->t_updates)) { - spin_unlock(&commit_transaction->t_handle_lock); - write_unlock(&journal->j_state_lock); - schedule(); - write_lock(&journal->j_state_lock); - spin_lock(&commit_transaction->t_handle_lock); + TASK_UNINTERRUPTIBLE); + if (!atomic_read(&transaction->t_updates)) { + spin_unlock(&transaction->t_handle_lock); + finish_wait(&journal->j_wait_updates, &wait); + break; } + spin_unlock(&transaction->t_handle_lock); + write_unlock(&journal->j_state_lock); + schedule(); finish_wait(&journal->j_wait_updates, &wait); + write_lock(&journal->j_state_lock); } - spin_unlock(&commit_transaction->t_handle_lock); } /** @@ -877,8 +888,6 @@ void jbd2_journal_wait_updates(journal_t *journal) */ void jbd2_journal_lock_updates(journal_t *journal) { - DEFINE_WAIT(wait); - jbd2_might_wait_for_commit(journal); write_lock(&journal->j_state_lock); diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index b288c8ae1236b..837cd55fd4c5e 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -415,13 +415,15 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); ret = -EIO; - goto out_free; + goto out_sum_exit; } jffs2_calc_trigger_levels(c); return 0; + out_sum_exit: + jffs2_sum_exit(c); out_free: kvfree(c->blocks); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 2ac410477c4f4..71f03a5d36ed2 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -603,8 +603,8 @@ int jffs2_do_fill_super(struct super_block *sb, struct fs_context *fc) jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); kvfree(c->blocks); - out_inohash: jffs2_clear_xattr_subsystem(c); + out_inohash: kfree(c->inocache_list); out_wbuf: jffs2_flash_cleanup(c); diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index b676056826beb..29671e33a1714 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -136,7 +136,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (!s) { JFFS2_WARNING("Can't allocate memory for summary\n"); ret = -ENOMEM; - goto out; + goto out_buf; } } @@ -275,13 +275,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) } ret = 0; out: + jffs2_sum_reset_collected(s); + kfree(s); + out_buf: if (buf_size) kfree(flashbuf); #ifndef __ECOS else mtd_unpoint(c->mtd, 0, c->mtd->size); #endif - kfree(s); return ret; } diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 91f4ec93dab1f..d8502f4989d9d 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -148,6 +148,7 @@ static const s8 budtab[256] = { * 0 - success * -ENOMEM - insufficient memory * -EIO - i/o error + * -EINVAL - wrong bmap data */ int dbMount(struct inode *ipbmap) { @@ -179,6 +180,12 @@ int dbMount(struct inode *ipbmap) bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); + if (!bmp->db_numag) { + release_metapage(mp); + kfree(bmp); + return -EINVAL; + } + bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c343666d9a428..6464dde03705c 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -358,12 +358,11 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, struct cb_process_state *cps) { struct cb_devicenotifyargs *args = argp; + const struct pnfs_layoutdriver_type *ld = NULL; uint32_t i; __be32 res = 0; - struct nfs_client *clp = cps->clp; - struct nfs_server *server = NULL; - if (!clp) { + if (!cps->clp) { res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); goto out; } @@ -371,23 +370,15 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, for (i = 0; i < args->ndevs; i++) { struct cb_devicenotifyitem *dev = &args->devs[i]; - if (!server || - server->pnfs_curr_ld->id != dev->cbd_layout_type) { - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) - if (server->pnfs_curr_ld && - server->pnfs_curr_ld->id == dev->cbd_layout_type) { - rcu_read_unlock(); - goto found; - } - rcu_read_unlock(); - continue; + if (!ld || ld->id != dev->cbd_layout_type) { + pnfs_put_layoutdriver(ld); + ld = pnfs_find_layoutdriver(dev->cbd_layout_type); + if (!ld) + continue; } - - found: - nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); + nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id); } - + pnfs_put_layoutdriver(ld); out: kfree(args->devs); return res; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index f90de8043b0f9..8dcb08e1a885d 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -271,10 +271,6 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, n = ntohl(*p++); if (n == 0) goto out; - if (n > ULONG_MAX / sizeof(*args->devs)) { - status = htonl(NFS4ERR_BADXDR); - goto out; - } args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL); if (!args->devs) { diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 7fba7711e6b3a..3d5ba43f44bb6 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -949,7 +949,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_filename_inline(xdr, &entry->name, &entry->len); if (unlikely(error)) - return error; + return -EAGAIN; /* * The type (size and byte order) of nfscookie isn't defined in diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 9274c9c5efea6..7ab60ad98776f 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1967,7 +1967,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, bool plus) { struct user_namespace *userns = rpc_userns(entry->server->client); - struct nfs_entry old = *entry; __be32 *p; int error; u64 new_cookie; @@ -1987,15 +1986,15 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_fileid3(xdr, &entry->ino); if (unlikely(error)) - return error; + return -EAGAIN; error = decode_inline_filename3(xdr, &entry->name, &entry->len); if (unlikely(error)) - return error; + return -EAGAIN; error = decode_cookie3(xdr, &new_cookie); if (unlikely(error)) - return error; + return -EAGAIN; entry->d_type = DT_UNKNOWN; @@ -2003,7 +2002,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->fattr->valid = 0; error = decode_post_op_attr(xdr, entry->fattr, userns); if (unlikely(error)) - return error; + return -EAGAIN; if (entry->fattr->valid & NFS_ATTR_FATTR_V3) entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); @@ -2018,11 +2017,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return -EAGAIN; if (*p != xdr_zero) { error = decode_nfs_fh3(xdr, entry->fh); - if (unlikely(error)) { - if (error == -E2BIG) - goto out_truncated; - return error; - } + if (unlikely(error)) + return -EAGAIN; } else zero_nfs_fh3(entry->fh); } @@ -2031,11 +2027,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->cookie = new_cookie; return 0; - -out_truncated: - dprintk("NFS: directory entry contains invalid file handle\n"); - *entry = old; - return -EAGAIN; } /* @@ -2228,6 +2219,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, /* ignore properties */ result->lease_time = 0; result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + result->xattr_support = 0; return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0e0db6c276196..c36fa0d0d438b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8333,6 +8333,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) case -NFS4ERR_DEADSESSION: nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); + return; } if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && res->dir != NFS4_CDFS4_BOTH) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ad7f83dc9a2df..815d630802451 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1218,6 +1218,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) do { list_splice_init(&mirror->pg_list, &head); + mirror->pg_recoalesce = 0; while (!list_empty(&head)) { struct nfs_page *req; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7c9090a28e5c3..7ddd003ab8b1a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -92,6 +92,17 @@ find_pnfs_driver(u32 id) return local; } +const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) +{ + return find_pnfs_driver(id); +} + +void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) +{ + if (ld) + module_put(ld->owner); +} + void unset_pnfs_layoutdriver(struct nfs_server *nfss) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f4d7548d67b24..07f11489e4e9f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -234,6 +234,8 @@ struct pnfs_devicelist { extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); +extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); +extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); /* nfs4proc.c */ extern size_t max_response_pages(struct nfs_server *server); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 73dcaa99fa9ba..e3570c656b0f9 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -92,6 +92,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, info->maxfilesize = 0x7FFFFFFF; info->lease_time = 0; info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + info->xattr_support = 0; return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 987a187bd39aa..60693ab6a0325 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -316,7 +316,10 @@ static void nfs_mapping_set_error(struct page *page, int error) struct address_space *mapping = page_file_mapping(page); SetPageError(page); - mapping_set_error(mapping, error); + filemap_set_wb_err(mapping, error); + if (mapping->host) + errseq_set(&mapping->host->i_sb->s_wb_err, + error == -ENOSPC ? -ENOSPC : -EIO); nfs_set_pageerror(mapping); } @@ -1409,6 +1412,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, { int priority = flush_task_priority(how); + if (IS_SWAPFILE(hdr->inode)) + task_setup_data->flags |= RPC_TASK_SWAPPER; task_setup_data->priority = priority; rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); trace_nfs_initiate_write(hdr); diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 8bc807c5fea4c..cc2831cec6695 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -632,7 +632,7 @@ nfsd_file_cache_init(void) if (!nfsd_filecache_wq) goto out; - nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, + nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, sizeof(*nfsd_file_hashtbl), GFP_KERNEL); if (!nfsd_file_hashtbl) { pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); @@ -700,7 +700,7 @@ nfsd_file_cache_init(void) nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kfree(nfsd_file_hashtbl); + kvfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; @@ -811,7 +811,7 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kfree(nfsd_file_hashtbl); + kvfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 32063733443d4..f3b71fd1d1341 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4711,6 +4711,14 @@ nfsd_break_deleg_cb(struct file_lock *fl) return ret; } +/** + * nfsd_breaker_owns_lease - Check if lease conflict was resolved + * @fl: Lock state to check + * + * Return values: + * %true: Lease conflict was resolved + * %false: Lease conflict was not resolved. + */ static bool nfsd_breaker_owns_lease(struct file_lock *fl) { struct nfs4_delegation *dl = fl->fl_owner; @@ -4718,11 +4726,11 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl) struct nfs4_client *clp; if (!i_am_nfsd()) - return NULL; + return false; rqst = kthread_data(current); /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */ if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4) - return NULL; + return false; clp = *(rqst->rq_lease_breaker); return dl->dl_stid.sc_client == clp; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 18b8eb43a19bc..fcdab8a8a41f4 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -230,7 +230,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) unsigned long cnt = argp->len; unsigned int nvecs; - dprintk("nfsd: WRITE %s %d bytes at %d\n", + dprintk("nfsd: WRITE %s %u bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 528fb299430e6..852f71580bd06 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -32,7 +32,7 @@ struct nfsd_readargs { struct nfsd_writeargs { svc_fh fh; __u32 offset; - int len; + __u32 len; struct xdr_buf payload; }; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4474adb393ca8..517b71c73aa96 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1881,6 +1881,10 @@ int ntfs_read_inode_mount(struct inode *vi) } /* Now allocate memory for the attribute list. */ ni->attr_list_size = (u32)ntfs_attr_size(a); + if (!ni->attr_list_size) { + ntfs_error(sb, "Attr_list_size is zero"); + goto put_err_out; + } ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); if (!ni->attr_list) { ntfs_error(sb, "Not enough memory to allocate buffer " diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index f033de733adb3..effe92c7d6937 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -337,7 +337,6 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) /* Read information header from global quota file */ int ocfs2_global_read_info(struct super_block *sb, int type) { - struct inode *gqinode = NULL; unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, GROUP_QUOTA_SYSTEM_INODE }; struct ocfs2_global_disk_dqinfo dinfo; @@ -346,29 +345,31 @@ int ocfs2_global_read_info(struct super_block *sb, int type) u64 pcount; int status; + oinfo->dqi_gi.dqi_sb = sb; + oinfo->dqi_gi.dqi_type = type; + ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); + oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); + oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; + oinfo->dqi_gqi_bh = NULL; + oinfo->dqi_gqi_count = 0; + /* Read global header */ - gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], + oinfo->dqi_gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], OCFS2_INVALID_SLOT); - if (!gqinode) { + if (!oinfo->dqi_gqinode) { mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n", type); status = -EINVAL; goto out_err; } - oinfo->dqi_gi.dqi_sb = sb; - oinfo->dqi_gi.dqi_type = type; - oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); - oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; - oinfo->dqi_gqi_bh = NULL; - oinfo->dqi_gqi_count = 0; - oinfo->dqi_gqinode = gqinode; + status = ocfs2_lock_global_qf(oinfo, 0); if (status < 0) { mlog_errno(status); goto out_err; } - status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk, + status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk, &pcount, NULL); if (status < 0) goto out_unlock; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 0e4b16d4c037f..b1a8b046f4c22 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -702,8 +702,6 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) info->dqi_priv = oinfo; oinfo->dqi_type = type; INIT_LIST_HEAD(&oinfo->dqi_chunk); - oinfo->dqi_gqinode = NULL; - ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); oinfo->dqi_rec = NULL; oinfo->dqi_lqi_bh = NULL; oinfo->dqi_libh = NULL; diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c index 6d8d4bf208377..2e244ada1f970 100644 --- a/fs/proc/bootconfig.c +++ b/fs/proc/bootconfig.c @@ -32,6 +32,8 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size) int ret = 0; key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL); + if (!key) + return -ENOMEM; xbc_for_each_key_value(leaf, val) { ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index f243cb5e6a4fb..e26162f102ffe 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -143,21 +143,22 @@ static void pstore_timer_kick(void) mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); } -/* - * Should pstore_dump() wait for a concurrent pstore_dump()? If - * not, the current pstore_dump() will report a failure to dump - * and return. - */ -static bool pstore_cannot_wait(enum kmsg_dump_reason reason) +static bool pstore_cannot_block_path(enum kmsg_dump_reason reason) { - /* In NMI path, pstore shouldn't block regardless of reason. */ + /* + * In case of NMI path, pstore shouldn't be blocked + * regardless of reason. + */ if (in_nmi()) return true; switch (reason) { /* In panic case, other cpus are stopped by smp_send_stop(). */ case KMSG_DUMP_PANIC: - /* Emergency restart shouldn't be blocked. */ + /* + * Emergency restart shouldn't be blocked by spinning on + * pstore_info::buf_lock. + */ case KMSG_DUMP_EMERG: return true; default: @@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long total = 0; const char *why; unsigned int part = 1; + unsigned long flags = 0; int ret; why = kmsg_dump_reason_str(reason); - if (down_trylock(&psinfo->buf_lock)) { - /* Failed to acquire lock: give up if we cannot wait. */ - if (pstore_cannot_wait(reason)) { - pr_err("dump skipped in %s path: may corrupt error record\n", - in_nmi() ? "NMI" : why); - return; - } - if (down_interruptible(&psinfo->buf_lock)) { - pr_err("could not grab semaphore?!\n"); + if (pstore_cannot_block_path(reason)) { + if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) { + pr_err("dump skipped in %s path because of concurrent dump\n", + in_nmi() ? "NMI" : why); return; } + } else { + spin_lock_irqsave(&psinfo->buf_lock, flags); } kmsg_dump_rewind(&iter); @@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += record.size; part++; } - - up(&psinfo->buf_lock); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); } static struct kmsg_dumper pstore_dumper = { @@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi) psi->write_user = pstore_write_user_compat; psinfo = psi; mutex_init(&psinfo->read_mutex); - sema_init(&psinfo->buf_lock, 1); + spin_lock_init(&psinfo->buf_lock); if (psi->flags & PSTORE_FLAGS_DMESG) allocate_buf_for_compression(); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index dbe72f664abf3..86151889548e3 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -349,20 +349,97 @@ static int ubifs_create(struct user_namespace *mnt_userns, struct inode *dir, return err; } -static int do_tmpfile(struct inode *dir, struct dentry *dentry, - umode_t mode, struct inode **whiteout) +static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) +{ + int err; + umode_t mode = S_IFCHR | WHITEOUT_MODE; + struct inode *inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; + struct fscrypt_name nm; + + /* + * Create an inode('nlink = 1') for whiteout without updating journal, + * let ubifs_jnl_rename() store it on flash to complete rename whiteout + * atomically. + */ + + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", + dentry, mode, dir->i_ino); + + err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); + if (err) + return ERR_PTR(err); + + inode = ubifs_new_inode(c, dir, mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_free; + } + + init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); + ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); + + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) + goto out_inode; + + /* The dir size is updated by do_rename. */ + insert_inode_hash(inode); + + return inode; + +out_inode: + make_bad_inode(inode); + iput(inode); +out_free: + fscrypt_free_filename(&nm); + ubifs_err(c, "cannot create whiteout file, error %d", err); + return ERR_PTR(err); +} + +/** + * lock_2_inodes - a wrapper for locking two UBIFS inodes. + * @inode1: first inode + * @inode2: second inode + * + * We do not implement any tricks to guarantee strict lock ordering, because + * VFS has already done it for us on the @i_mutex. So this is just a simple + * wrapper function. + */ +static void lock_2_inodes(struct inode *inode1, struct inode *inode2) +{ + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); +} + +/** + * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. + * @inode1: first inode + * @inode2: second inode + */ +static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) +{ + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); +} + +static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1}; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct ubifs_budget_req ino_req = { .dirtied_ino = 1 }; - struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir); + struct ubifs_inode *ui; int err, instantiated = 0; struct fscrypt_name nm; /* - * Budget request settings: new dirty inode, new direntry, - * budget for dirtied inode will be released via writeback. + * Budget request settings: new inode, new direntry, changing the + * parent directory inode. + * Allocate budget separately for new dirtied inode, the budget will + * be released via writeback. */ dbg_gen("dent '%pd', mode %#hx in dir ino %lu", @@ -392,42 +469,30 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, } ui = ubifs_inode(inode); - if (whiteout) { - init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); - ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); - } - err = ubifs_init_security(dir, inode, &dentry->d_name); if (err) goto out_inode; mutex_lock(&ui->ui_mutex); insert_inode_hash(inode); - - if (whiteout) { - mark_inode_dirty(inode); - drop_nlink(inode); - *whiteout = inode; - } else { - d_tmpfile(dentry, inode); - } + d_tmpfile(dentry, inode); ubifs_assert(c, ui->dirty); instantiated = 1; mutex_unlock(&ui->ui_mutex); - mutex_lock(&dir_ui->ui_mutex); + lock_2_inodes(dir, inode); err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); if (err) goto out_cancel; - mutex_unlock(&dir_ui->ui_mutex); + unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); return 0; out_cancel: - mutex_unlock(&dir_ui->ui_mutex); + unlock_2_inodes(dir, inode); out_inode: make_bad_inode(inode); if (!instantiated) @@ -441,12 +506,6 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, return err; } -static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) -{ - return do_tmpfile(dir, dentry, mode, NULL); -} - /** * vfs_dent_type - get VFS directory entry type. * @type: UBIFS directory entry type @@ -660,32 +719,6 @@ static int ubifs_dir_release(struct inode *dir, struct file *file) return 0; } -/** - * lock_2_inodes - a wrapper for locking two UBIFS inodes. - * @inode1: first inode - * @inode2: second inode - * - * We do not implement any tricks to guarantee strict lock ordering, because - * VFS has already done it for us on the @i_mutex. So this is just a simple - * wrapper function. - */ -static void lock_2_inodes(struct inode *inode1, struct inode *inode2) -{ - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); - mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); -} - -/** - * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. - * @inode1: first inode - * @inode2: second inode - */ -static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) -{ - mutex_unlock(&ubifs_inode(inode2)->ui_mutex); - mutex_unlock(&ubifs_inode(inode1)->ui_mutex); -} - static int ubifs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { @@ -949,7 +982,8 @@ static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; int err, sz_change; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct fscrypt_name nm; /* @@ -1264,17 +1298,19 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, .dirtied_ino = 3 }; struct ubifs_budget_req ino_req = { .dirtied_ino = 1, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; + struct ubifs_budget_req wht_req; struct timespec64 time; unsigned int saved_nlink; struct fscrypt_name old_nm, new_nm; /* - * Budget request settings: deletion direntry, new direntry, removing - * the old inode, and changing old and new parent directory inodes. + * Budget request settings: + * req: deletion direntry, new direntry, removing the old inode, + * and changing old and new parent directory inodes. + * + * wht_req: new whiteout inode for RENAME_WHITEOUT. * - * However, this operation also marks the target inode as dirty and - * does not write it, so we allocate budget for the target inode - * separately. + * ino_req: marks the target inode as dirty and does not write it. */ dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x", @@ -1331,20 +1367,44 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_release; } - err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout); - if (err) { + /* + * The whiteout inode without dentry is pinned in memory, + * umount won't happen during rename process because we + * got parent dentry. + */ + whiteout = create_whiteout(old_dir, old_dentry); + if (IS_ERR(whiteout)) { + err = PTR_ERR(whiteout); kfree(dev); goto out_release; } - spin_lock(&whiteout->i_lock); - whiteout->i_state |= I_LINKABLE; - spin_unlock(&whiteout->i_lock); - whiteout_ui = ubifs_inode(whiteout); whiteout_ui->data = dev; whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0)); ubifs_assert(c, !whiteout_ui->dirty); + + memset(&wht_req, 0, sizeof(struct ubifs_budget_req)); + wht_req.new_ino = 1; + wht_req.new_ino_d = ALIGN(whiteout_ui->data_len, 8); + /* + * To avoid deadlock between space budget (holds ui_mutex and + * waits wb work) and writeback work(waits ui_mutex), do space + * budget before ubifs inodes locked. + */ + err = ubifs_budget_space(c, &wht_req); + if (err) { + /* + * Whiteout inode can not be written on flash by + * ubifs_jnl_write_inode(), because it's neither + * dirty nor zero-nlink. + */ + iput(whiteout); + goto out_release; + } + + /* Add the old_dentry size to the old_dir size. */ + old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); } lock_4_inodes(old_dir, new_dir, new_inode, whiteout); @@ -1416,29 +1476,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); if (unlink && IS_SYNC(new_inode)) sync = 1; - } - - if (whiteout) { - struct ubifs_budget_req wht_req = { .dirtied_ino = 1, - .dirtied_ino_d = \ - ALIGN(ubifs_inode(whiteout)->data_len, 8) }; - - err = ubifs_budget_space(c, &wht_req); - if (err) { - kfree(whiteout_ui->data); - whiteout_ui->data_len = 0; - iput(whiteout); - goto out_release; - } - - inc_nlink(whiteout); - mark_inode_dirty(whiteout); - - spin_lock(&whiteout->i_lock); - whiteout->i_state &= ~I_LINKABLE; - spin_unlock(&whiteout->i_lock); - - iput(whiteout); + /* + * S_SYNC flag of whiteout inherits from the old_dir, and we + * have already checked the old dir inode. So there is no need + * to check whiteout. + */ } err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir, @@ -1449,6 +1491,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); ubifs_release_budget(c, &req); + if (whiteout) { + ubifs_release_budget(c, &wht_req); + iput(whiteout); + } + mutex_lock(&old_inode_ui->ui_mutex); release = old_inode_ui->dirty; mark_inode_dirty_sync(old_inode); @@ -1457,11 +1504,16 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, if (release) ubifs_release_budget(c, &ino_req); if (IS_SYNC(old_inode)) - err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); + /* + * Rename finished here. Although old inode cannot be updated + * on flash, old ctime is not a big problem, don't return err + * code to userspace. + */ + old_inode->i_sb->s_op->write_inode(old_inode, NULL); fscrypt_free_filename(&old_nm); fscrypt_free_filename(&new_nm); - return err; + return 0; out_cancel: if (unlink) { @@ -1482,11 +1534,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, inc_nlink(old_dir); } } + unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); if (whiteout) { - drop_nlink(whiteout); + ubifs_release_budget(c, &wht_req); iput(whiteout); } - unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); out_release: ubifs_release_budget(c, &ino_req); ubifs_release_budget(c, &req); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5cfa28cd00cdc..6b45a037a0471 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -570,7 +570,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, } if (!PagePrivate(page)) { - SetPagePrivate(page); + attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); __set_page_dirty_nobuffers(page); } @@ -947,7 +947,7 @@ static int do_writepage(struct page *page, int len) release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); kunmap(page); @@ -1304,7 +1304,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset, release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); } @@ -1471,8 +1471,8 @@ static int ubifs_migrate_page(struct address_space *mapping, return rc; if (PagePrivate(page)) { - ClearPagePrivate(page); - SetPagePrivate(newpage); + detach_page_private(page); + attach_page_private(newpage, (void *)1); } if (mode != MIGRATE_SYNC_NO_COPY) @@ -1496,7 +1496,7 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) return 0; ubifs_assert(c, PagePrivate(page)); ubifs_assert(c, 0); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); return 1; } @@ -1567,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) else { if (!PageChecked(page)) ubifs_convert_page_budget(c); - SetPagePrivate(page); + attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); __set_page_dirty_nobuffers(page); } diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 789a7813f3fa2..1607a3c76681a 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -854,16 +854,42 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ n = aligned_len >> c->max_write_shift; if (n) { - n <<= c->max_write_shift; + int m = n - 1; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf + written, - wbuf->offs, n); + + if (m) { + /* '(n-1)<max_write_shift < len' is always true. */ + m <<= c->max_write_shift; + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, m); + if (err) + goto out; + wbuf->offs += m; + aligned_len -= m; + len -= m; + written += m; + } + + /* + * The non-written len of buf may be less than 'n' because + * parameter 'len' is not 8 bytes aligned, so here we read + * min(len, n) bytes from buf. + */ + n = 1 << c->max_write_shift; + memcpy(wbuf->buf, buf + written, min(len, n)); + if (n > len) { + ubifs_assert(c, n - len < 8); + ubifs_pad(c, wbuf->buf + len, n - len); + } + + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); if (err) goto out; wbuf->offs += n; aligned_len -= n; - len -= n; + len -= min(len, n); written += n; } diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index c6a8634877803..71bcebe45f9c5 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -108,7 +108,7 @@ static int setflags(struct inode *inode, int flags) struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_budget_req req = { .dirtied_ino = 1, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; err = ubifs_budget_space(c, &req); if (err) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 8ea680dba61e3..75dab0ae3939d 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1207,9 +1207,9 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir, * @sync: non-zero if the write-buffer has to be synchronized * * This function implements the re-name operation which may involve writing up - * to 4 inodes and 2 directory entries. It marks the written inodes as clean - * and returns zero on success. In case of failure, a negative error code is - * returned. + * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) + * and 2 directory entries. It marks the written inodes as clean and returns + * zero on success. In case of failure, a negative error code is returned. */ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct inode *old_inode, @@ -1222,14 +1222,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, void *p; union ubifs_key key; struct ubifs_dent_node *dent, *dent2; - int err, dlen1, dlen2, ilen, lnum, offs, len, orphan_added = 0; + int err, dlen1, dlen2, ilen, wlen, lnum, offs, len, orphan_added = 0; int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; int last_reference = !!(new_inode && new_inode->i_nlink == 0); int move = (old_dir != new_dir); - struct ubifs_inode *new_ui; + struct ubifs_inode *new_ui, *whiteout_ui; u8 hash_old_dir[UBIFS_HASH_ARR_SZ]; u8 hash_new_dir[UBIFS_HASH_ARR_SZ]; u8 hash_new_inode[UBIFS_HASH_ARR_SZ]; + u8 hash_whiteout_inode[UBIFS_HASH_ARR_SZ]; u8 hash_dent1[UBIFS_HASH_ARR_SZ]; u8 hash_dent2[UBIFS_HASH_ARR_SZ]; @@ -1249,9 +1250,20 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, } else ilen = 0; + if (whiteout) { + whiteout_ui = ubifs_inode(whiteout); + ubifs_assert(c, mutex_is_locked(&whiteout_ui->ui_mutex)); + ubifs_assert(c, whiteout->i_nlink == 1); + ubifs_assert(c, !whiteout_ui->dirty); + wlen = UBIFS_INO_NODE_SZ; + wlen += whiteout_ui->data_len; + } else + wlen = 0; + aligned_dlen1 = ALIGN(dlen1, 8); aligned_dlen2 = ALIGN(dlen2, 8); - len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); + len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + + ALIGN(wlen, 8) + ALIGN(plen, 8); if (move) len += plen; @@ -1313,6 +1325,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, p += ALIGN(ilen, 8); } + if (whiteout) { + pack_inode(c, p, whiteout, 0); + err = ubifs_node_calc_hash(c, p, hash_whiteout_inode); + if (err) + goto out_release; + + p += ALIGN(wlen, 8); + } + if (!move) { pack_inode(c, p, old_dir, 1); err = ubifs_node_calc_hash(c, p, hash_old_dir); @@ -1352,6 +1373,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, if (new_inode) ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, new_inode->i_ino); + if (whiteout) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, + whiteout->i_ino); } release_head(c, BASEHD); @@ -1368,8 +1392,6 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm); if (err) goto out_ro; - - ubifs_delete_orphan(c, whiteout->i_ino); } else { err = ubifs_add_dirt(c, lnum, dlen2); if (err) @@ -1390,6 +1412,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, offs += ALIGN(ilen, 8); } + if (whiteout) { + ino_key_init(c, &key, whiteout->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, wlen, + hash_whiteout_inode); + if (err) + goto out_ro; + offs += ALIGN(wlen, 8); + } + ino_key_init(c, &key, old_dir->i_ino); err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir); if (err) @@ -1410,6 +1441,11 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, new_ui->synced_i_size = new_ui->ui_size; spin_unlock(&new_ui->ui_lock); } + /* + * No need to mark whiteout inode clean. + * Whiteout doesn't have non-zero size, no need to update + * synced_i_size for whiteout_ui. + */ mark_inode_clean(c, ubifs_inode(old_dir)); if (move) mark_inode_clean(c, ubifs_inode(new_dir)); diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index af5ad51d3eef8..b12ae9bdebf43 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -283,4 +283,11 @@ enum drbg_prefixes { DRBG_PREFIX3 }; +extern int drbg_alloc_state(struct drbg_state *drbg); +extern void drbg_dealloc_state(struct drbg_state *drbg); +extern void drbg_convert_tfm_core(const char *cra_driver_name, int *coreref, + bool *pr); +extern const struct drbg_core drbg_cores[]; +extern unsigned short drbg_sec_strength(drbg_flag_t flags); + #endif /* _DRBG_H */ diff --git a/crypto/jitterentropy.h b/include/crypto/internal/jitterentropy.h similarity index 100% rename from crypto/jitterentropy.h rename to include/crypto/internal/jitterentropy.h diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index b501d0badaea2..5f88e484515a1 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -592,10 +592,16 @@ struct drm_display_info { bool rgb_quant_range_selectable; /** - * @edid_hdmi_dc_modes: Mask of supported hdmi deep color modes. Even - * more stuff redundant with @bus_formats. + * @edid_hdmi_rgb444_dc_modes: Mask of supported hdmi deep color modes + * in RGB 4:4:4. Even more stuff redundant with @bus_formats. */ - u8 edid_hdmi_dc_modes; + u8 edid_hdmi_rgb444_dc_modes; + + /** + * @edid_hdmi_ycbcr444_dc_modes: Mask of supported hdmi deep color + * modes in YCbCr 4:4:4. Even more stuff redundant with @bus_formats. + */ + u8 edid_hdmi_ycbcr444_dc_modes; /** * @cea_rev: CEA revision of the HDMI sink. diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 30359e434c3f3..fdf3cf6ccc021 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -456,7 +456,7 @@ struct drm_panel; #define DP_FEC_CAPABILITY_1 0x091 /* 2.0 */ /* DP-HDMI2.1 PCON DSC ENCODER SUPPORT */ -#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xC /* 0x9E - 0x92 */ +#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xD /* 0x92 through 0x9E */ #define DP_PCON_DSC_ENCODER 0x092 # define DP_PCON_DSC_ENCODER_SUPPORTED (1 << 0) # define DP_PCON_DSC_PPS_ENC_OVERRIDE (1 << 1) @@ -1528,8 +1528,6 @@ u8 drm_dp_get_adjust_request_pre_emphasis(const u8 link_status[DP_LINK_STATUS_SI int lane); u8 drm_dp_get_adjust_tx_ffe_preset(const u8 link_status[DP_LINK_STATUS_SIZE], int lane); -u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZE], - unsigned int lane); #define DP_BRANCH_OUI_HEADER_SIZE 0xc #define DP_RECEIVER_CAP_SIZE 0xf diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h index b84693fbd2b50..ec4f543c3d950 100644 --- a/include/drm/drm_modeset_lock.h +++ b/include/drm/drm_modeset_lock.h @@ -34,6 +34,7 @@ struct drm_modeset_lock; * struct drm_modeset_acquire_ctx - locking context (see ww_acquire_ctx) * @ww_ctx: base acquire ctx * @contended: used internally for -EDEADLK handling + * @stack_depot: used internally for contention debugging * @locked: list of held locks * @trylock_only: trylock mode used in atomic contexts/panic notifiers * @interruptible: whether interruptible locking should be used. diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index a3dba31df01e9..6db58d1808665 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -151,7 +151,16 @@ static __always_inline int arch_atomic_read_acquire(const atomic_t *v) { - return smp_load_acquire(&(v)->counter); + int ret; + + if (__native_word(atomic_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic_read_acquire arch_atomic_read_acquire #endif @@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v) static __always_inline void arch_atomic_set_release(atomic_t *v, int i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic_set(v, i); + } } #define arch_atomic_set_release arch_atomic_set_release #endif @@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v) static __always_inline s64 arch_atomic64_read_acquire(const atomic64_t *v) { - return smp_load_acquire(&(v)->counter); + s64 ret; + + if (__native_word(atomic64_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_atomic64_read(v); + __atomic_acquire_fence(); + } + + return ret; } #define arch_atomic64_read_acquire arch_atomic64_read_acquire #endif @@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v) static __always_inline void arch_atomic64_set_release(atomic64_t *v, s64 i) { - smp_store_release(&(v)->counter, i); + if (__native_word(atomic64_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_atomic64_set(v, i); + } } #define arch_atomic64_set_release arch_atomic64_set_release #endif @@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 +// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 049cf9421d831..f821b72433613 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -87,6 +87,9 @@ struct coredump_params { loff_t written; loff_t pos; loff_t to_skip; + int vma_count; + size_t vma_data_size; + struct core_vma_metadata *vma_meta; }; /* diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b4de2010fba55..bc5c04d711bbc 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -24,6 +24,7 @@ #include #include #include +#include /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) @@ -604,6 +605,21 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); } +/** + * blk_cgroup_mergeable - Determine whether to allow or disallow merges + * @rq: request to merge into + * @bio: bio to merge + * + * @bio and @rq should belong to the same cgroup and their issue_as_root should + * match. The latter is necessary as we don't want to throttle e.g. a metadata + * update because it happens to be next to a regular IO. + */ +static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) +{ + return rq->bio->bi_blkg == bio->bi_blkg && + bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio); +} + void blk_cgroup_bio_start(struct bio *bio); void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); @@ -659,6 +675,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { } static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline void blk_cgroup_bio_start(struct bio *bio) { } +static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fe065c394fff6..86c0f85df8bb4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -317,7 +317,8 @@ enum { BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ - BIO_TRACKED, /* set if bio goes through the rq_qos path */ + BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 75c151413fda8..b145025f3eaca 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +extern struct mutex cgroup_mutex; + +static inline void cgroup_lock(void) +{ + mutex_lock(&cgroup_mutex); +} + +static inline void cgroup_unlock(void) +{ + mutex_unlock(&cgroup_mutex); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ @@ -707,6 +718,8 @@ struct cgroup; static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} +static inline void cgroup_lock(void) {} +static inline void cgroup_unlock(void) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 248a68c668b45..aa12ec94fae28 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -12,6 +12,8 @@ struct core_vma_metadata { unsigned long start, end; unsigned long flags; unsigned long dump_size; + unsigned long pgoff; + struct file *file; }; /* @@ -25,9 +27,6 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); -int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, - struct core_vma_metadata **vma_meta, - size_t *vma_data_size_ptr); extern void do_coredump(const kernel_siginfo_t *siginfo); #else static inline void do_coredump(const kernel_siginfo_t *siginfo) {} diff --git a/include/linux/fb.h b/include/linux/fb.h index 02f362c661c80..3d7306c9a7065 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -502,6 +502,7 @@ struct fb_info { } *apertures; bool skip_vt_switch; /* no VT switch on suspend/resume required */ + bool forced_out; /* set when being removed by another driver */ }; static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9367f1cb2e3c4..f40754caaefa4 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -579,7 +579,16 @@ enum NR_SOFTIRQS }; -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* + * The following vectors can be safely ignored after ksoftirqd is parked: + * + * _ RCU: + * 1) rcutree_migrate_callbacks() migrates the queue. + * 2) rcu_report_dead() reports the final quiescent states. + * + * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue + */ +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 93d270ca0c567..fbec0c019e3b1 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc) int handle_irq_desc(struct irq_desc *desc); int generic_handle_irq(unsigned int irq); +int generic_dispatch_irq(unsigned int irq); #ifdef CONFIG_IRQ_DOMAIN /* diff --git a/include/linux/ksm.h b/include/linux/ksm.h index a38a5bca1ba58..f85a184bf6203 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -19,6 +19,10 @@ struct stable_node; struct mem_cgroup; #ifdef CONFIG_KSM +int ksm_madvise_merge(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *vm_flags); +int ksm_madvise_unmerge(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long *vm_flags); int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); int __ksm_enter(struct mm_struct *mm); diff --git a/include/linux/lrng.h b/include/linux/lrng.h new file mode 100644 index 0000000000000..3e8f93b53c849 --- /dev/null +++ b/include/linux/lrng.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2018 - 2021, Stephan Mueller + */ + +#ifndef _LRNG_H +#define _LRNG_H + +#include +#include +#include + +/* + * struct lrng_crypto_cb - cryptographic callback functions + * @lrng_drng_name Name of DRNG + * @lrng_hash_name Name of Hash used for reading entropy pool + * @lrng_drng_alloc: Allocate DRNG -- the provided integer should be + * used for sanity checks. + * return: allocated data structure or PTR_ERR on + * error + * @lrng_drng_dealloc: Deallocate DRNG + * @lrng_drng_seed_helper: Seed the DRNG with data of arbitrary length + * drng: is pointer to data structure allocated + * with lrng_drng_alloc + * return: >= 0 on success, < 0 on error + * @lrng_drng_generate_helper: Generate random numbers from the DRNG with + * arbitrary length + * @lrng_hash_alloc: Allocate the hash for reading the entropy pool + * return: allocated data structure (NULL is + * success too) or ERR_PTR on error + * @lrng_hash_dealloc: Deallocate Hash + * @lrng_hash_digestsize: Return the digestsize for the used hash to read + * out entropy pool + * hash: is pointer to data structure allocated + * with lrng_hash_alloc + * return: size of digest of hash in bytes + * @lrng_hash_init: Initialize hash + * hash: is pointer to data structure allocated + * with lrng_hash_alloc + * return: 0 on success, < 0 on error + * @lrng_hash_update: Update hash operation + * hash: is pointer to data structure allocated + * with lrng_hash_alloc + * return: 0 on success, < 0 on error + * @lrng_hash_final Final hash operation + * hash: is pointer to data structure allocated + * with lrng_hash_alloc + * return: 0 on success, < 0 on error + * @lrng_hash_desc_zero Zeroization of hash state buffer + * + * Assumptions: + * + * 1. Hash operation will not sleep + * 2. The hash' volatile state information is provided with *shash by caller. + */ +struct lrng_crypto_cb { + const char *(*lrng_drng_name)(void); + const char *(*lrng_hash_name)(void); + void *(*lrng_drng_alloc)(u32 sec_strength); + void (*lrng_drng_dealloc)(void *drng); + int (*lrng_drng_seed_helper)(void *drng, const u8 *inbuf, u32 inbuflen); + int (*lrng_drng_generate_helper)(void *drng, u8 *outbuf, u32 outbuflen); + void *(*lrng_hash_alloc)(void); + void (*lrng_hash_dealloc)(void *hash); + u32 (*lrng_hash_digestsize)(void *hash); + int (*lrng_hash_init)(struct shash_desc *shash, void *hash); + int (*lrng_hash_update)(struct shash_desc *shash, const u8 *inbuf, + u32 inbuflen); + int (*lrng_hash_final)(struct shash_desc *shash, u8 *digest); + void (*lrng_hash_desc_zero)(struct shash_desc *shash); +}; + +/* Register cryptographic backend */ +#ifdef CONFIG_LRNG_DRNG_SWITCH +int lrng_set_drng_cb(const struct lrng_crypto_cb *cb); +#else /* CONFIG_LRNG_DRNG_SWITCH */ +static inline int +lrng_set_drng_cb(const struct lrng_crypto_cb *cb) { return -EOPNOTSUPP; } +#endif /* CONFIG_LRNG_DRNG_SWITCH */ + +#endif /* _LRNG_H */ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 819ec92dc2a82..db924fe379c9c 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -332,6 +332,8 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname, struct sockaddr *address, int addrlen) LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) +LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc, + struct sk_buff *skb) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3bf5c658bc448..419b5febc3ca5 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1046,6 +1046,11 @@ * @asoc pointer to current sctp association structure. * @sk pointer to current sock structure. * @newsk pointer to new sock structure. + * @sctp_assoc_established: + * Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet + * to the security module. + * @asoc pointer to sctp association structure. + * @skb pointer to skbuff of association packet. * * Security hooks for Infiniband * diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0abbd685703b9..b8e5718665b89 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -343,6 +343,11 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif +#ifdef CONFIG_LRU_GEN + /* per-memcg mm_struct list */ + struct lru_gen_mm_list mm_list; +#endif + struct mem_cgroup_per_node *nodeinfo[]; }; @@ -437,6 +442,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released. @@ -498,6 +504,7 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem page a caller should hold an rcu read lock to protect memcg * associated with a kmem page from being released. @@ -935,6 +942,23 @@ void unlock_page_memcg(struct page *page); void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); +/* try to stablize folio_memcg() for all the pages in a memcg */ +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + rcu_read_lock(); + + if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) + return true; + + rcu_read_unlock(); + return false; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) @@ -1372,6 +1396,18 @@ static inline void folio_memcg_unlock(struct folio *folio) { } +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + /* to match folio_memcg_rcu() */ + rcu_read_lock(); + return true; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + static inline void mem_cgroup_handle_over_high(void) { } diff --git a/include/linux/migrate.h b/include/linux/migrate.h index db96e10eb8da2..90e75d5a54d66 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -48,7 +48,15 @@ int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); extern bool numa_demotion_enabled; +extern void migrate_on_reclaim_init(void); +#ifdef CONFIG_HOTPLUG_CPU +extern void set_migration_target_nodes(void); #else +static inline void set_migration_target_nodes(void) {} +#endif +#else + +static inline void set_migration_target_nodes(void) {} static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t new, diff --git a/include/linux/mm.h b/include/linux/mm.h index 5744a3fc47169..1f3695e959429 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -227,6 +227,7 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +#define lru_to_folio(head) (list_entry((head)->prev, struct folio, lru)) void setup_initial_init_mm(void *start_code, void *end_code, void *end_data, void *brk); @@ -1032,6 +1033,8 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) #define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) +#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH) +#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH) /* * Define the bit shifts to access each section. For non-existent @@ -1585,6 +1588,11 @@ static inline unsigned long folio_pfn(struct folio *folio) return page_to_pfn(&folio->page); } +static inline struct folio *pfn_folio(unsigned long pfn) +{ + return page_folio(pfn_to_page(pfn)); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index cf90b1fa2c60c..1c8d617e73a9c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -32,15 +32,25 @@ static inline int page_is_file_lru(struct page *page) return folio_is_file_lru(page_folio(page)); } -static __always_inline void update_lru_size(struct lruvec *lruvec, +static __always_inline void __update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, long nr_pages) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); + lockdep_assert_held(&lruvec->lru_lock); + WARN_ON_ONCE(nr_pages != (int)nr_pages); + __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages); __mod_zone_page_state(&pgdat->node_zones[zid], NR_ZONE_LRU_BASE + lru, nr_pages); +} + +static __always_inline void update_lru_size(struct lruvec *lruvec, + enum lru_list lru, enum zone_type zid, + int nr_pages) +{ + __update_lru_size(lruvec, lru, zid, nr_pages); #ifdef CONFIG_MEMCG mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages); #endif @@ -92,11 +102,210 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio) return lru; } +#ifdef CONFIG_LRU_GEN + +static inline bool lru_gen_enabled(void) +{ +#ifdef CONFIG_LRU_GEN_ENABLED + DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]); +#else + DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]); +#endif +} + +static inline bool lru_gen_in_fault(void) +{ + return current->in_lru_fault; +} + +static inline int lru_gen_from_seq(unsigned long seq) +{ + return seq % MAX_NR_GENS; +} + +static inline int lru_hist_from_seq(unsigned long seq) +{ + return seq % NR_HIST_GENS; +} + +static inline int lru_tier_from_refs(int refs) +{ + VM_BUG_ON(refs > BIT(LRU_REFS_WIDTH)); + + /* see the comment on MAX_NR_TIERS */ + return order_base_2(refs + 1); +} + +static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) +{ + unsigned long max_seq = lruvec->lrugen.max_seq; + + VM_BUG_ON(gen >= MAX_NR_GENS); + + /* see the comment on MIN_NR_GENS */ + return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1); +} + +static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio, + int old_gen, int new_gen) +{ + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + int delta = folio_nr_pages(folio); + enum lru_list lru = type * LRU_INACTIVE_FILE; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON(old_gen != -1 && old_gen >= MAX_NR_GENS); + VM_BUG_ON(new_gen != -1 && new_gen >= MAX_NR_GENS); + VM_BUG_ON(old_gen == -1 && new_gen == -1); + + if (old_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone], + lrugen->nr_pages[old_gen][type][zone] - delta); + if (new_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone], + lrugen->nr_pages[new_gen][type][zone] + delta); + + /* addition */ + if (old_gen < 0) { + if (lru_gen_is_active(lruvec, new_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, delta); + return; + } + + /* deletion */ + if (new_gen < 0) { + if (lru_gen_is_active(lruvec, old_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, -delta); + return; + } + + /* promotion */ + if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) { + __update_lru_size(lruvec, lru, zone, -delta); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta); + } + + /* demotion requires isolation, e.g., lru_deactivate_fn() */ + VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); +} + +static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + int gen; + unsigned long old_flags, new_flags; + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + if (folio_test_unevictable(folio) || !lrugen->enabled) + return false; + /* + * There are three common cases for this page: + * 1. If it's hot, e.g., freshly faulted in or previously hot and + * migrated, add it to the youngest generation. + * 2. If it's cold but can't be evicted immediately, i.e., an anon page + * not in swapcache or a dirty page pending writeback, add it to the + * second oldest generation. + * 3. Everything else (clean, cold) is added to the oldest generation. + */ + if (folio_test_active(folio)) + gen = lru_gen_from_seq(lrugen->max_seq); + else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || + (folio_test_reclaim(folio) && + (folio_test_dirty(folio) || folio_test_writeback(folio)))) + gen = lru_gen_from_seq(lrugen->min_seq[type] + 1); + else + gen = lru_gen_from_seq(lrugen->min_seq[type]); + + do { + new_flags = old_flags = READ_ONCE(folio->flags); + VM_BUG_ON_FOLIO(new_flags & LRU_GEN_MASK, folio); + + /* see the comment on MIN_NR_GENS */ + new_flags &= ~(LRU_GEN_MASK | BIT(PG_active)); + new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; + } while (cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); + + lru_gen_update_size(lruvec, folio, -1, gen); + /* for folio_rotate_reclaimable() */ + if (reclaiming) + list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]); + else + list_add(&folio->lru, &lrugen->lists[gen][type][zone]); + + return true; +} + +static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + int gen; + unsigned long old_flags, new_flags; + + do { + new_flags = old_flags = READ_ONCE(folio->flags); + if (!(new_flags & LRU_GEN_MASK)) + return false; + + VM_BUG_ON_FOLIO(folio_test_active(folio), folio); + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + + gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + + new_flags &= ~LRU_GEN_MASK; + if ((new_flags & LRU_REFS_FLAGS) != LRU_REFS_FLAGS) + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + /* for shrink_page_list() */ + if (reclaiming) + new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim)); + else if (lru_gen_is_active(lruvec, gen)) + new_flags |= BIT(PG_active); + } while (cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); + + lru_gen_update_size(lruvec, folio, gen, -1); + list_del(&folio->lru); + + return true; +} + +#else + +static inline bool lru_gen_enabled(void) +{ + return false; +} + +static inline bool lru_gen_in_fault(void) +{ + return false; +} + +static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + return false; +} + +static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + return false; +} + +#endif /* CONFIG_LRU_GEN */ + static __always_inline void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_add_folio(lruvec, folio, false)) + return; + update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); list_add(&folio->lru, &lruvec->lists[lru]); @@ -113,6 +322,9 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) { enum lru_list lru = folio_lru_list(folio); + if (lru_gen_add_folio(lruvec, folio, true)) + return; + update_lru_size(lruvec, lru, folio_zonenum(folio), folio_nr_pages(folio)); list_add_tail(&folio->lru, &lruvec->lists[lru]); @@ -127,6 +339,9 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page, static __always_inline void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) { + if (lru_gen_del_folio(lruvec, folio, false)) + return; + list_del(&folio->lru); update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio), -folio_nr_pages(folio)); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0f549870da6a0..cbc7fa381ac6f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -3,6 +3,7 @@ #define _LINUX_MM_TYPES_H #include +#include #include #include @@ -17,6 +18,8 @@ #include #include #include +#include +#include #include @@ -637,6 +640,22 @@ struct mm_struct { #ifdef CONFIG_IOMMU_SUPPORT u32 pasid; #endif +#ifdef CONFIG_LRU_GEN + struct { + /* this mm_struct is on lru_gen_mm_list */ + struct list_head list; +#ifdef CONFIG_MEMCG + /* points to the memcg of "owner" above */ + struct mem_cgroup *memcg; +#endif + /* + * Set when switching to this mm_struct, as a hint of + * whether it has been used since the last time per-node + * page table walkers cleared the corresponding bits. + */ + nodemask_t nodes; + } lru_gen; +#endif /* CONFIG_LRU_GEN */ } __randomize_layout; /* @@ -663,6 +682,65 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return (struct cpumask *)&mm->cpu_bitmap; } +#ifdef CONFIG_LRU_GEN + +struct lru_gen_mm_list { + /* mm_struct list for page table walkers */ + struct list_head fifo; + /* protects the list above */ + spinlock_t lock; +}; + +void lru_gen_add_mm(struct mm_struct *mm); +void lru_gen_del_mm(struct mm_struct *mm); +#ifdef CONFIG_MEMCG +void lru_gen_migrate_mm(struct mm_struct *mm); +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ + INIT_LIST_HEAD(&mm->lru_gen.list); +#ifdef CONFIG_MEMCG + mm->lru_gen.memcg = NULL; +#endif + nodes_clear(mm->lru_gen.nodes); +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ + /* unlikely but not a bug when racing with lru_gen_migrate_mm() */ + VM_WARN_ON(list_empty(&mm->lru_gen.list)); + + if (!(current->flags & PF_KTHREAD) && !nodes_full(mm->lru_gen.nodes)) + nodes_setall(mm->lru_gen.nodes); +} + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_add_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_del_mm(struct mm_struct *mm) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_migrate_mm(struct mm_struct *mm) +{ +} +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ +} + +#endif /* CONFIG_LRU_GEN */ + struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index aed44e9b5d899..f98f9ce50e67f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -303,6 +303,207 @@ enum lruvec_flags { */ }; +#endif /* !__GENERATING_BOUNDS_H */ + +/* + * Evictable pages are divided into multiple generations. The youngest and the + * oldest generation numbers, max_seq and min_seq, are monotonically increasing. + * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An + * offset within MAX_NR_GENS, gen, indexes the LRU list of the corresponding + * generation. The gen counter in folio->flags stores gen+1 while a page is on + * one of lrugen->lists[]. Otherwise it stores 0. + * + * A page is added to the youngest generation on faulting. The aging needs to + * check the accessed bit at least twice before handing this page over to the + * eviction. The first check takes care of the accessed bit set on the initial + * fault; the second check makes sure this page hasn't been used since then. + * This process, AKA second chance, requires a minimum of two generations, + * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive + * LRU, these two generations are considered active; the rest of generations, if + * they exist, are considered inactive. See lru_gen_is_active(). PG_active is + * always cleared while a page is on one of lrugen->lists[] so that the aging + * needs not to worry about it. And it's set again when a page considered active + * is isolated for non-reclaiming purposes, e.g., migration. See + * lru_gen_add_folio() and lru_gen_del_folio(). + * + * MAX_NR_GENS is set to 4 so that the multi-gen LRU has twice of the categories + * of the active/inactive LRU. + * + */ +#define MIN_NR_GENS 2U +#define MAX_NR_GENS 4U + +/* + * Each generation is divided into multiple tiers. Tiers represent different + * ranges of numbers of accesses through file descriptors. A page accessed N + * times through file descriptors is in tier order_base_2(N). A page in the + * first tier (N=0,1) is marked by PG_referenced unless it was faulted in + * though page tables or read ahead. A page in any other tier (N>1) is marked + * by PG_referenced and PG_workingset. Two additional bits in folio->flags are + * required to support four tiers. + * + * In contrast to moving across generations which requires the LRU lock, moving + * across tiers only requires operations on folio->flags and therefore has a + * negligible cost in the buffered access path. In the eviction path, + * comparisons of refaulted/(evicted+protected) from the first tier and the + * rest infer whether pages accessed multiple times through file descriptors + * are statistically hot and thus worth protecting. + * + * MAX_NR_TIERS is set to 4 so that the multi-gen LRU has of twice of the + * categories of the active/inactive LRU when tracking accesses through file + * descriptors. + */ +#define MAX_NR_TIERS 4U +#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) + +#ifndef __GENERATING_BOUNDS_H + +struct lruvec; +struct page_vma_mapped_walk; + +#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) +#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) + +#ifdef CONFIG_LRU_GEN + +enum { + LRU_GEN_ANON, + LRU_GEN_FILE, +}; + +enum { + LRU_GEN_CORE, + LRU_GEN_MM_WALK, + LRU_GEN_NONLEAF_YOUNG, + NR_LRU_GEN_CAPS +}; + +#define MIN_LRU_BATCH BITS_PER_LONG +#define MAX_LRU_BATCH (MIN_LRU_BATCH * 128) + +/* whether to keep historical stats from evicted generations */ +#ifdef CONFIG_LRU_GEN_STATS +#define NR_HIST_GENS MAX_NR_GENS +#else +#define NR_HIST_GENS 1U +#endif + +/* + * The youngest generation number is stored in max_seq for both anon and file + * types as they are aged on an equal footing. The oldest generation numbers are + * stored in min_seq[] separately for anon and file types as clean file pages + * can be evicted regardless of swap constraints. + * + * Normally anon and file min_seq are in sync. But if swapping is constrained, + * e.g., out of swap space, file min_seq is allowed to advance and leave anon + * min_seq behind. + */ +struct lru_gen_struct { + /* the aging increments the youngest generation number */ + unsigned long max_seq; + /* the eviction increments the oldest generation numbers */ + unsigned long min_seq[ANON_AND_FILE]; + /* the birth time of each generation in jiffies */ + unsigned long timestamps[MAX_NR_GENS]; + /* the multi-gen LRU lists */ + struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the sizes of the above lists */ + unsigned long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the exponential moving average of refaulted */ + unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; + /* the exponential moving average of evicted+protected */ + unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; + /* the first tier doesn't need protection, hence the minus one */ + unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; + /* can be modified without holding the LRU lock */ + atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + /* whether the multi-gen LRU is enabled */ + bool enabled; +}; + +enum { + MM_PTE_TOTAL, /* total leaf entries */ + MM_PTE_OLD, /* old leaf entries */ + MM_PTE_YOUNG, /* young leaf entries */ + MM_PMD_TOTAL, /* total non-leaf entries */ + MM_PMD_FOUND, /* non-leaf entries found in Bloom filters */ + MM_PMD_ADDED, /* non-leaf entries added to Bloom filters */ + NR_MM_STATS +}; + +/* mnemonic codes for the mm stats above */ +#define MM_STAT_CODES "toydfa" + +/* double-buffering Bloom filters */ +#define NR_BLOOM_FILTERS 2 + +struct lru_gen_mm_state { + /* set to max_seq after each iteration */ + unsigned long seq; + /* where the current iteration starts (inclusive) */ + struct list_head *head; + /* where the last iteration ends (exclusive) */ + struct list_head *tail; + /* to wait for the last page table walker to finish */ + struct wait_queue_head wait; + /* Bloom filters flip after each iteration */ + unsigned long *filters[NR_BLOOM_FILTERS]; + /* the mm stats for debugging */ + unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; + /* the number of concurrent page table walkers */ + int nr_walkers; +}; + +struct lru_gen_mm_walk { + /* the lruvec under reclaim */ + struct lruvec *lruvec; + /* unstable max_seq from lru_gen_struct */ + unsigned long max_seq; + /* the next address within an mm to scan */ + unsigned long next_addr; + /* to batch page table entries */ + unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)]; + /* to batch promoted pages */ + int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* to batch the mm stats */ + int mm_stats[NR_MM_STATS]; + /* total batched items */ + int batched; + bool can_swap; + bool full_scan; +}; + +void lru_gen_init_lruvec(struct lruvec *lruvec); +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); + +#ifdef CONFIG_MEMCG +void lru_gen_init_memcg(struct mem_cgroup *memcg); +void lru_gen_exit_memcg(struct mem_cgroup *memcg); +#endif + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_init_lruvec(struct lruvec *lruvec) +{ +} + +static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) +{ +} +#endif + +#endif /* CONFIG_LRU_GEN */ + struct lruvec { struct list_head lists[NR_LRU_LISTS]; /* per lruvec lru_lock for memcg */ @@ -320,6 +521,12 @@ struct lruvec { unsigned long refaults[ANON_AND_FILE]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; +#ifdef CONFIG_LRU_GEN + /* evictable pages divided into generations */ + struct lru_gen_struct lrugen; + /* to concurrently iterate lru_gen_mm_list */ + struct lru_gen_mm_state mm_state; +#endif #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif @@ -911,6 +1118,10 @@ typedef struct pglist_data { unsigned long flags; +#ifdef CONFIG_LRU_GEN + /* kswap mm walk data */ + struct lru_gen_mm_walk mm_walk; +#endif ZONE_PADDING(_pad2_) /* Per-node vmstats */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 5b88cd51fadb5..dcf90144d70b7 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1240,6 +1240,7 @@ struct nand_secure_region { * @lock: Lock protecting the suspended field. Also used to serialize accesses * to the NAND device * @suspended: Set to 1 when the device is suspended, 0 when it's not + * @resume_wq: wait queue to sleep if rawnand is in suspended state. * @cur_cs: Currently selected target. -1 means no target selected, otherwise we * should always have cur_cs >= 0 && cur_cs < nanddev_ntargets(). * NAND Controller drivers should not modify this value, but they're @@ -1294,6 +1295,7 @@ struct nand_chip { /* Internals */ struct mutex lock; unsigned int suspended : 1; + wait_queue_head_t resume_wq; int cur_cs; int read_retries; struct nand_secure_region *secure_regions; diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index e6487a6911360..8676316547cc4 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -99,7 +99,7 @@ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, return skb; nf_hook_state_init(&state, NF_NETDEV_EGRESS, - NFPROTO_NETDEV, dev, NULL, NULL, + NFPROTO_NETDEV, NULL, dev, NULL, dev_net(dev), NULL); /* nf assumes rcu_read_lock, not just read_lock_bh */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 567c3ddba2c42..90840c459abcc 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -486,6 +486,7 @@ static inline int num_node_state(enum node_states state) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) +#define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids 1U #define nr_online_nodes 1U diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 855dd9b3e84be..a662435c9b6f1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -337,6 +337,7 @@ enum { NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index ef1e3e736e148..c1946cdb845fe 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -55,7 +55,8 @@ #define SECTIONS_WIDTH 0 #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define NODES_WIDTH NODES_SHIFT #elif defined(CONFIG_SPARSEMEM_VMEMMAP) #error "Vmemmap: No space for nodes field in page flags" @@ -89,8 +90,8 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \ - <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 @@ -100,8 +101,8 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif -#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \ - > BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS #error "Not enough bits in page flags" #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1c3b6e5c8bfd3..a95518ca98eb7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -935,7 +935,7 @@ __PAGEFLAG(Isolated, isolated, PF_ANY); 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ 1UL << PG_slab | 1UL << PG_active | \ - 1UL << PG_unevictable | __PG_MLOCKED) + 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* * Flags checked when a page is prepped for return by the page allocator. @@ -946,7 +946,7 @@ __PAGEFLAG(Isolated, isolated, PF_ANY); * alloc-free cycle to prevent from reusing the page. */ #define PAGE_FLAGS_CHECK_AT_PREP \ - (PAGEFLAGS_MASK & ~__PG_HWPOISON) + ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/linux/pci.h b/include/linux/pci.h index 8253a5413d7c4..678fecdf6b812 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -668,6 +668,7 @@ struct pci_bus { struct bin_attribute *legacy_io; /* Legacy I/O for this bus */ struct bin_attribute *legacy_mem; /* Legacy mem */ unsigned int is_added:1; + unsigned int unsafe_warn:1; /* warned about RW1C config write */ }; #define to_pci_bus(n) container_of(n, struct pci_bus, dev) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index f4f4077b97aab..743e7fc4afda3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -212,7 +212,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) @@ -233,7 +233,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, BUILD_BUG(); return 0; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -259,6 +259,19 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_pte_young +/* + * Return whether the accessed bit is supported on the local CPU. + * + * This stub assumes accessing through an old PTE triggers a page fault. + * Architectures that automatically set the access bit should overwrite it. + */ +static inline bool arch_has_hw_pte_young(void) +{ + return false; +} +#endif + #ifndef __HAVE_ARCH_PTEP_CLEAR static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/include/linux/pstore.h b/include/linux/pstore.h index eb93a54cff31f..e97a8188f0fd8 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -87,7 +87,7 @@ struct pstore_record { * @owner: module which is responsible for this backend driver * @name: name of the backend driver * - * @buf_lock: semaphore to serialize access to @buf + * @buf_lock: spinlock to serialize access to @buf * @buf: preallocated crash dump buffer * @bufsize: size of @buf available for crash dump bytes (must match * smallest number of bytes available for writing to a @@ -178,7 +178,7 @@ struct pstore_info { struct module *owner; const char *name; - struct semaphore buf_lock; + spinlock_t buf_lock; char *buf; size_t bufsize; diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index bebc911161b6f..d373f1bcbf7ca 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -16,8 +16,20 @@ DECLARE_PER_CPU(u32, kstack_offset); * alignment. Also, since this use is being explicitly masked to a max of * 10 bits, stack-clash style attacks are unlikely. For more details see * "VLAs" in Documentation/process/deprecated.rst + * + * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently + * only with Clang and not GCC). Initializing the unused area on each syscall + * entry is expensive, and generating an implicit call to memset() may also be + * problematic (such as in noinstr functions). Therefore, if the compiler + * supports it (which it should if it initializes allocas), always use the + * "uninitialized" variant of the builtin. */ -void *__builtin_alloca(size_t size); +#if __has_builtin(__builtin_alloca_uninitialized) +#define __kstack_alloca __builtin_alloca_uninitialized +#else +#define __kstack_alloca __builtin_alloca +#endif + /* * Use, at most, 10 bits of entropy. We explicitly cap this to keep the * "VLA" from being unbounded (see above). 10 bits leaves enough room for @@ -36,7 +48,7 @@ void *__builtin_alloca(size_t size); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ + u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ asm volatile("" :: "r"(ptr) : "memory"); \ } \ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 858f4d429946d..5fed476f977f6 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void) rcu_tasks_qs(current, (preempt)); \ } while (0) -static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) +static inline int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return 0; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 53209d6694001..6cc91291d0782 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -19,7 +19,7 @@ void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); -int rcu_needs_cpu(u64 basem, u64 *nextevt); +int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248b..be23cc9e6edde 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -914,6 +914,10 @@ struct task_struct { #ifdef CONFIG_MEMCG unsigned in_user_fault:1; #endif +#ifdef CONFIG_LRU_GEN + /* whether the LRU algorithm may apply to this access */ + unsigned in_lru_fault:1; +#endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif @@ -1630,6 +1634,14 @@ static inline unsigned int task_state_index(struct task_struct *tsk) if (tsk_state == TASK_IDLE) state = TASK_REPORT_IDLE; + /* + * We're lying here, but rather than expose a completely new task state + * to userspace, we can make this appear as if the task has gone through + * a regular rt_mutex_lock() call. + */ + if (tsk_state == TASK_RTLOCK_WAIT) + state = TASK_UNINTERRUPTIBLE; + return fls(state); } diff --git a/include/linux/security.h b/include/linux/security.h index 6d72772182c82..25b3ef71f495e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1422,6 +1422,8 @@ int security_sctp_bind_connect(struct sock *sk, int optname, struct sockaddr *address, int addrlen); void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); +int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -1641,6 +1643,12 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *newsk) { } + +static inline int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index c58cc142d23f4..8c32935e1059d 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -458,6 +458,8 @@ extern void uart_handle_cts_change(struct uart_port *uport, extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); +void uart_xchar_out(struct uart_port *uport, int offset); + #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8a636e678902d..42f885f0ce8af 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1475,6 +1475,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = offset; +} #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -1485,6 +1490,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = skb->head + offset; +} #endif /* Internal */ @@ -1724,19 +1734,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } -/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +/* This variant of skb_unclone() makes sure skb->truesize + * and skb_end_offset() are not changed, whenever a new skb->head is needed. + * + * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) + * when various debugging features are in place. + */ +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); - if (skb_cloned(skb)) { - unsigned int save = skb->truesize; - int res; - - res = pskb_expand_head(skb, 0, 0, pri); - skb->truesize = save; - return res; - } + if (skb_cloned(skb)) + return __skb_unclone_keeptruesize(skb, pri); return 0; } diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 18a717fe62eb0..7f32dd59e7513 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -310,21 +310,16 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } -static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) -{ - if (msg->skb) - sock_drop(psock->sk, msg->skb); - kfree(msg); -} - static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) list_add_tail(&msg->list, &psock->ingress_msg); - else - drop_sk_msg(psock, msg); + else { + sk_msg_free(psock->sk, msg); + kfree(msg); + } spin_unlock_bh(&psock->ingress_lock); } diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 0aad7009b50e6..bd0d11af76c5e 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -645,7 +645,7 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, static inline struct ti_sci_resource * devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, - u32 dev_id, u32 sub_type); + u32 dev_id, u32 sub_type) { return ERR_PTR(-EINVAL); } diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b519609af1d02..4417f667c757e 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -731,6 +731,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; + if (len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 955ea4d7af0b2..eef5e87c03b43 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -139,6 +139,9 @@ struct rpc_xprt_ops { void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); + int (*get_srcaddr)(struct rpc_xprt *xprt, char *buf, + size_t buflen); + unsigned short (*get_srcport)(struct rpc_xprt *xprt); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); void (*prepare_request)(struct rpc_rqst *req); diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 8c2a712cb2420..fed813ffe7db1 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -10,7 +10,6 @@ int init_socket_xprt(void); void cleanup_socket_xprt(void); -unsigned short get_srcport(struct rpc_xprt *); #define RPC_MIN_RESVPORT (1U) #define RPC_MAX_RESVPORT (65535U) @@ -89,5 +88,6 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_WRITE (5) #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) +#define XPRT_SOCK_CONNECT_SENT (8) #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 1d38d9475c4d0..04d84ac6d1ac4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -137,6 +137,10 @@ union swap_header { */ struct reclaim_state { unsigned long reclaimed_slab; +#ifdef CONFIG_LRU_GEN + /* per-thread mm walk data */ + struct lru_gen_mm_walk *mm_walk; +#endif }; #ifdef __KERNEL__ @@ -372,6 +376,7 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); +extern void folio_activate(struct folio *folio); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 819c0cb00b6d3..0cd9c82d6c22d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -921,6 +921,7 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, size_t vlen, int behavior, unsigned int flags); asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); +asmlinkage long sys_pmadv_ksm(int pidfd, int behavior, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 78b91bb92f0d5..618568b01a80d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -225,7 +225,8 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - unused:5; + fast_ack_mode:2, /* which fast ack mode ? */ + unused:3; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 33a4240e6a6f1..82213f9c4c17f 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -139,6 +139,8 @@ static inline void set_rlimit_ucount_max(struct user_namespace *ns, #ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) @@ -172,6 +174,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else +#define unprivileged_userns_clone 0 + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; diff --git a/include/linux/xarray.h b/include/linux/xarray.h index d6d5da6ed7354..66e28bc1a023f 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -9,6 +9,7 @@ * See Documentation/core-api/xarray.rst for how to use the XArray. */ +#include #include #include #include diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 35c073d44ec5a..5cb095b09a940 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -255,6 +255,16 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, + + /* When this quirk is set, HCI_OP_SET_EVENT_FLT requests with + * HCI_FLT_CLEAR_ALL are ignored and event filtering is + * completely avoided. A subset of the CSR controller + * clones struggle with this and instantly lock up. + * + * Note that devices using this must (separately) disable + * runtime suspend, because event filtering takes place there. + */ + HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, }; /* HCI device flags */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 4ad47d9f9d271..7623bb1f89c06 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -134,7 +134,8 @@ struct inet_connection_sock { u32 icsk_probes_tstamp; u32 icsk_user_timeout; - u64 icsk_ca_priv[104 / sizeof(u64)]; +/* XXX inflated by temporary internal debugging info */ + u64 icsk_ca_priv[216 / sizeof(u64)]; #define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv) }; diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 37f0fbefb060f..9939c366f720d 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -177,4 +177,5 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); int nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum); void nf_nat_helper_put(struct nf_conntrack_helper *helper); +void nf_ct_set_auto_assign_helper_warned(struct net *net); #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index bd59e950f4d67..64daafd1fc41c 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -10,6 +10,8 @@ #include #include #include +#include +#include struct nf_flowtable; struct nf_flow_rule; @@ -317,4 +319,20 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); +static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +{ + __be16 proto; + + proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + + sizeof(struct pppoe_hdr))); + switch (proto) { + case htons(PPP_IP): + return htons(ETH_P_IP); + case htons(PPP_IPV6): + return htons(ETH_P_IPV6); + } + + return 0; +} + #endif /* _NF_FLOW_TABLE_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b9fc978fb2cad..e72896801fdf3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,6 +370,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 #define TCP_ECN_SEEN 8 +#define TCP_ECN_ECT_PERMANENT 16 enum tcp_tw_status { TCP_TW_SUCCESS = 0, @@ -808,6 +809,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) return max_t(s64, t1 - t0, 0); } +static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) +{ + return max_t(s32, t1 - t0, 0); +} + static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { return tcp_ns_to_ts(skb->skb_mstamp_ns); @@ -883,9 +889,14 @@ struct tcp_skb_cb { /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ - u64 first_tx_mstamp; + u32 first_tx_mstamp; /* when we reached the "delivered" count */ - u64 delivered_mstamp; + u32 delivered_mstamp; +#define TCPCB_IN_FLIGHT_BITS 20 +#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) + u32 in_flight:20, /* packets in flight at transmit */ + unused2:12; + u32 lost; /* packets lost so far upon tx of skb */ } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -1011,7 +1022,11 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED 0x1 /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN 0x2 -#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) +/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ +#define TCP_CONG_WANTS_CE_EVENTS 0x4 +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ + TCP_CONG_NEEDS_ECN | \ + TCP_CONG_WANTS_CE_EVENTS) union tcp_cc_info; @@ -1031,8 +1046,11 @@ struct ack_sample { */ struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ + u32 prior_lost; /* tp->lost at "prior_mstamp" */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ + u32 tx_in_flight; /* packets in flight at starting timestamp */ + s32 lost; /* number of packets lost over interval */ s32 delivered; /* number of packets delivered over interval */ s32 delivered_ce; /* number of packets delivered w/ CE marks*/ long interval_us; /* time for tp->delivered to incr "delivered" */ @@ -1045,6 +1063,7 @@ struct rate_sample { bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ + bool is_ece; /* did this ACK have ECN marked? */ }; struct tcp_congestion_ops { @@ -1068,8 +1087,11 @@ struct tcp_congestion_ops { /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* override sysctl_tcp_min_tso_segs */ - u32 (*min_tso_segs)(struct sock *sk); + /* pick target number of segments per TSO/GSO skb (optional): */ + u32 (*tso_segs)(struct sock *sk, unsigned int mss_now); + + /* react to a specific lost skb (optional) */ + void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) @@ -1132,6 +1154,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) } #endif +static inline bool tcp_ca_wants_ce_events(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN | + TCP_CONG_WANTS_CE_EVENTS); +} + static inline bool tcp_ca_needs_ecn(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1157,6 +1187,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) } /* From tcp_rate.c */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 647c53b261051..57e3e239a1fce 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -206,6 +206,7 @@ struct scsi_device { unsigned rpm_autosuspend:1; /* Enable runtime autosuspend at device * creation time */ unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */ + unsigned silence_suspend:1; /* Do not print runtime PM related messages */ unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index 089a760d36eb7..6fb2d5e378fdd 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -18,6 +18,13 @@ enum nhlt_link_type { NHLT_LINK_INVALID }; +enum nhlt_device_type { + NHLT_DEVICE_BT = 0, + NHLT_DEVICE_DMIC = 1, + NHLT_DEVICE_I2S = 4, + NHLT_DEVICE_INVALID +}; + #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_INTEL_NHLT) struct wav_fmt { @@ -41,13 +48,6 @@ struct wav_fmt_ext { u8 sub_fmt[16]; } __packed; -enum nhlt_device_type { - NHLT_DEVICE_BT = 0, - NHLT_DEVICE_DMIC = 1, - NHLT_DEVICE_I2S = 4, - NHLT_DEVICE_INVALID -}; - struct nhlt_specific_cfg { u32 size; u8 caps[]; @@ -133,6 +133,9 @@ void intel_nhlt_free(struct nhlt_acpi_table *addr); int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt); bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, u8 link_type); + +int intel_nhlt_ssp_endpoint_mask(struct nhlt_acpi_table *nhlt, u8 device_type); + struct nhlt_specific_cfg * intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, u32 bus_id, u8 link_type, u8 vbps, u8 bps, @@ -163,6 +166,11 @@ static inline bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, return false; } +static inline int intel_nhlt_ssp_endpoint_mask(struct nhlt_acpi_table *nhlt, u8 device_type) +{ + return 0; +} + static inline struct nhlt_specific_cfg * intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, u32 bus_id, u8 link_type, u8 vbps, u8 bps, diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 36da42cd07748..6b99310b5b889 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -401,6 +401,8 @@ struct snd_pcm_runtime { wait_queue_head_t tsleep; /* transfer sleep */ struct fasync_struct *fasync; bool stop_operating; /* sync_stop will be called */ + struct mutex buffer_mutex; /* protect for buffer changes */ + atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */ /* -- private section -- */ void *private_data; diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 19e957b7f9410..1a0b7030f72a3 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -95,6 +95,17 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) +TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA); +TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); + #define show_fc_reason(reason) \ __print_symbolic(reason, \ { EXT4_FC_REASON_XATTR, "XATTR"}, \ @@ -2723,41 +2734,50 @@ TRACE_EVENT(ext4_fc_commit_stop, #define FC_REASON_NAME_STAT(reason) \ show_fc_reason(reason), \ - __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason] + __entry->fc_ineligible_rc[reason] TRACE_EVENT(ext4_fc_stats, - TP_PROTO(struct super_block *sb), - - TP_ARGS(sb), + TP_PROTO(struct super_block *sb), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(struct ext4_sb_info *, sbi) - __field(int, count) - ), + TP_ARGS(sb), - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->sbi = EXT4_SB(sb); - ), + TP_STRUCT__entry( + __field(dev_t, dev) + __array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX) + __field(unsigned long, fc_commits) + __field(unsigned long, fc_ineligible_commits) + __field(unsigned long, fc_numblks) + ), - TP_printk("dev %d:%d fc ineligible reasons:\n" - "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " - "num_commits:%ld, ineligible: %ld, numblks: %ld", - MAJOR(__entry->dev), MINOR(__entry->dev), - FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), - FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), - FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), - FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), - FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), - FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), - __entry->sbi->s_fc_stats.fc_num_commits, - __entry->sbi->s_fc_stats.fc_ineligible_commits, - __entry->sbi->s_fc_stats.fc_numblks) + TP_fast_assign( + int i; + __entry->dev = sb->s_dev; + for (i = 0; i < EXT4_FC_REASON_MAX; i++) { + __entry->fc_ineligible_rc[i] = + EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i]; + } + __entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits; + __entry->fc_ineligible_commits = + EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; + __entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks; + ), + + TP_printk("dev %d,%d fc ineligible reasons:\n" + "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u " + "num_commits:%lu, ineligible: %lu, numblks: %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), + FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), + FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), + FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), + FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), + FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), + __entry->fc_commits, __entry->fc_ineligible_commits, + __entry->fc_numblks) ); #define DEFINE_TRACE_DENTRY_EVENT(__type) \ diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e70c90116edae..4a3ab0ed6e062 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -83,12 +83,15 @@ enum rxrpc_call_trace { rxrpc_call_error, rxrpc_call_got, rxrpc_call_got_kernel, + rxrpc_call_got_timer, rxrpc_call_got_userid, rxrpc_call_new_client, rxrpc_call_new_service, rxrpc_call_put, rxrpc_call_put_kernel, rxrpc_call_put_noqueue, + rxrpc_call_put_notimer, + rxrpc_call_put_timer, rxrpc_call_put_userid, rxrpc_call_queued, rxrpc_call_queued_ref, @@ -278,12 +281,15 @@ enum rxrpc_tx_point { EM(rxrpc_call_error, "*E*") \ EM(rxrpc_call_got, "GOT") \ EM(rxrpc_call_got_kernel, "Gke") \ + EM(rxrpc_call_got_timer, "GTM") \ EM(rxrpc_call_got_userid, "Gus") \ EM(rxrpc_call_new_client, "NWc") \ EM(rxrpc_call_new_service, "NWs") \ EM(rxrpc_call_put, "PUT") \ EM(rxrpc_call_put_kernel, "Pke") \ - EM(rxrpc_call_put_noqueue, "PNQ") \ + EM(rxrpc_call_put_noqueue, "PnQ") \ + EM(rxrpc_call_put_notimer, "PnT") \ + EM(rxrpc_call_put_timer, "PTM") \ EM(rxrpc_call_put_userid, "Pus") \ EM(rxrpc_call_queued, "QUE") \ EM(rxrpc_call_queued_ref, "QUR") \ diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 1c48b0ae3ba30..c780129ab7423 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -886,8 +886,11 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_pmadv_ksm 451 +__SYSCALL(__NR_pmadv_ksm, sys_pmadv_ksm) + #undef __NR_syscalls -#define __NR_syscalls 451 +#define __NR_syscalls 452 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b0383d371b9af..015bfec0dbfdb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2286,8 +2286,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) @@ -2975,8 +2975,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description @@ -4279,8 +4279,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) * Description diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 20ee93f0f8761..96d52dd9c48ac 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -231,9 +231,42 @@ struct tcp_bbr_info { __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ }; +/* Phase as reported in netlink/ss stats. */ +enum tcp_bbr2_phase { + BBR2_PHASE_INVALID = 0, + BBR2_PHASE_STARTUP = 1, + BBR2_PHASE_DRAIN = 2, + BBR2_PHASE_PROBE_RTT = 3, + BBR2_PHASE_PROBE_BW_UP = 4, + BBR2_PHASE_PROBE_BW_DOWN = 5, + BBR2_PHASE_PROBE_BW_CRUISE = 6, + BBR2_PHASE_PROBE_BW_REFILL = 7 +}; + +struct tcp_bbr2_info { + /* u64 bw: bandwidth (app throughput) estimate in Byte per sec: */ + __u32 bbr_bw_lsb; /* lower 32 bits of bw */ + __u32 bbr_bw_msb; /* upper 32 bits of bw */ + __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ + __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ + __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ + __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ + __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ + __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ + __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ + __u8 bbr_mode; /* current bbr_mode in state machine */ + __u8 bbr_phase; /* current state machine phase */ + __u8 unused1; /* alignment padding; not used yet */ + __u8 bbr_version; /* MUST be at this offset in struct */ + __u32 bbr_inflight_lo; /* lower/short-term data volume bound */ + __u32 bbr_inflight_hi; /* higher/long-term data volume bound */ + __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ +}; + union tcp_cc_info { struct tcpvegas_info vegas; struct tcp_dctcp_info dctcp; struct tcp_bbr_info bbr; + struct tcp_bbr2_info bbr2; }; #endif /* _UAPI_INET_DIAG_H_ */ diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 24a1c45bd1ae2..98e60801195e2 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -45,7 +45,7 @@ struct loop_info { unsigned long lo_inode; /* ioctl r/o */ __kernel_old_dev_t lo_rdevice; /* ioctl r/o */ int lo_offset; - int lo_encrypt_type; + int lo_encrypt_type; /* obsolete, ignored */ int lo_encrypt_key_size; /* ioctl w/o */ int lo_flags; char lo_name[LO_NAME_SIZE]; @@ -61,7 +61,7 @@ struct loop_info64 { __u64 lo_offset; __u64 lo_sizelimit;/* bytes, 0 == max available */ __u32 lo_number; /* ioctl r/o */ - __u32 lo_encrypt_type; + __u32 lo_encrypt_type; /* obsolete, ignored */ __u32 lo_encrypt_key_size; /* ioctl w/o */ __u32 lo_flags; __u8 lo_file_name[LO_NAME_SIZE]; diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ffe..d9db7ad438908 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 9b77cfc42efa3..283c5a7b3f2c8 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -159,8 +159,16 @@ struct rfkill_event_ext { * old behaviour for all userspace, unless it explicitly opts in to the * rules outlined here by using the new &struct rfkill_event_ext. * - * Userspace using &struct rfkill_event_ext must adhere to the following - * rules + * Additionally, some other userspace (bluez, g-s-d) was reading with a + * large size but as streaming reads rather than message-based, or with + * too strict checks for the returned size. So eventually, we completely + * reverted this, and extended messages need to be opted in to by using + * an ioctl: + * + * ioctl(fd, RFKILL_IOCTL_MAX_SIZE, sizeof(struct rfkill_event_ext)); + * + * Userspace using &struct rfkill_event_ext and the ioctl must adhere to + * the following rules: * * 1. accept short writes, optionally using them to detect that it's * running on an older kernel; @@ -175,6 +183,8 @@ struct rfkill_event_ext { #define RFKILL_IOC_MAGIC 'R' #define RFKILL_IOC_NOINPUT 1 #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) +#define RFKILL_IOC_MAX_SIZE 2 +#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32) /* and that's all userspace gets */ diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 9a402fdb60e97..77ee207623a9b 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -105,23 +105,11 @@ struct rseq { * Read and set by the kernel. Set by user-space with single-copy * atomicity semantics. This field should only be updated by the * thread which registered this data structure. Aligned on 64-bit. + * + * 32-bit architectures should update the low order bits of the + * rseq_cs field, leaving the high order bits initialized to 0. */ - union { - __u64 ptr64; -#ifdef __LP64__ - __u64 ptr; -#else - struct { -#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) - __u32 padding; /* Initialized to zero. */ - __u32 ptr32; -#else /* LITTLE */ - __u32 ptr32; - __u32 padding; /* Initialized to zero. */ -#endif /* ENDIAN */ - } ptr; -#endif - } rseq_cs; + __u64 rseq_cs; /* * Restartable sequences flags field. diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index c4042dcfdc0c3..8885e69178bd7 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -68,6 +68,9 @@ /* NVIDIA Tegra Combined UART */ #define PORT_TEGRA_TCU 41 +/* ASPEED AST2x00 virtual UART */ +#define PORT_ASPEED_VUART 42 + /* Intel EG20 */ #define PORT_PCH_8LINE 44 #define PORT_PCH_2LINE 45 diff --git a/init/Kconfig b/init/Kconfig index e9119bf54b1f3..2fce0d599a91e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1231,6 +1231,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + default y + depends on USER_NS + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say Y. + config PID_NS bool "PID Namespaces" default y @@ -1370,7 +1386,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE config CC_OPTIMIZE_FOR_PERFORMANCE_O3 bool "Optimize more for performance (-O3)" - depends on ARC help Choosing this option will pass "-O3" to your compiler to optimize the kernel yet more for performance. diff --git a/kernel/audit.h b/kernel/audit.h index c4498090a5bd6..58b66543b4d57 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -201,6 +201,10 @@ struct audit_context { struct { char *name; } module; + struct { + struct audit_ntp_data ntp_data; + struct timespec64 tk_injoffset; + } time; }; int fds[2]; struct audit_proctitle proctitle; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index a83928cbdcb7c..ea2ee1181921e 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1340,6 +1340,53 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) from_kuid(&init_user_ns, name->fcap.rootid)); } +static void audit_log_time(struct audit_context *context, struct audit_buffer **ab) +{ + const struct audit_ntp_data *ntp = &context->time.ntp_data; + const struct timespec64 *tk = &context->time.tk_injoffset; + static const char * const ntp_name[] = { + "offset", + "freq", + "status", + "tai", + "tick", + "adjust", + }; + int type; + + if (context->type == AUDIT_TIME_ADJNTPVAL) { + for (type = 0; type < AUDIT_NTP_NVALS; type++) { + if (ntp->vals[type].newval != ntp->vals[type].oldval) { + if (!*ab) { + *ab = audit_log_start(context, + GFP_KERNEL, + AUDIT_TIME_ADJNTPVAL); + if (!*ab) + return; + } + audit_log_format(*ab, "op=%s old=%lli new=%lli", + ntp_name[type], + ntp->vals[type].oldval, + ntp->vals[type].newval); + audit_log_end(*ab); + *ab = NULL; + } + } + } + if (tk->tv_sec != 0 || tk->tv_nsec != 0) { + if (!*ab) { + *ab = audit_log_start(context, GFP_KERNEL, + AUDIT_TIME_INJOFFSET); + if (!*ab) + return; + } + audit_log_format(*ab, "sec=%lli nsec=%li", + (long long)tk->tv_sec, tk->tv_nsec); + audit_log_end(*ab); + *ab = NULL; + } +} + static void show_special(struct audit_context *context, int *call_panic) { struct audit_buffer *ab; @@ -1454,6 +1501,11 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_format(ab, "(null)"); break; + case AUDIT_TIME_ADJNTPVAL: + case AUDIT_TIME_INJOFFSET: + /* this call deviates from the rest, eating the buffer */ + audit_log_time(context, &ab); + break; } audit_log_end(ab); } @@ -2849,31 +2901,26 @@ void __audit_fanotify(unsigned int response) void __audit_tk_injoffset(struct timespec64 offset) { - audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET, - "sec=%lli nsec=%li", - (long long)offset.tv_sec, offset.tv_nsec); -} - -static void audit_log_ntp_val(const struct audit_ntp_data *ad, - const char *op, enum audit_ntp_type type) -{ - const struct audit_ntp_val *val = &ad->vals[type]; - - if (val->newval == val->oldval) - return; + struct audit_context *context = audit_context(); - audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL, - "op=%s old=%lli new=%lli", op, val->oldval, val->newval); + /* only set type if not already set by NTP */ + if (!context->type) + context->type = AUDIT_TIME_INJOFFSET; + memcpy(&context->time.tk_injoffset, &offset, sizeof(offset)); } void __audit_ntp_log(const struct audit_ntp_data *ad) { - audit_log_ntp_val(ad, "offset", AUDIT_NTP_OFFSET); - audit_log_ntp_val(ad, "freq", AUDIT_NTP_FREQ); - audit_log_ntp_val(ad, "status", AUDIT_NTP_STATUS); - audit_log_ntp_val(ad, "tai", AUDIT_NTP_TAI); - audit_log_ntp_val(ad, "tick", AUDIT_NTP_TICK); - audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST); + struct audit_context *context = audit_context(); + int type; + + for (type = 0; type < AUDIT_NTP_NVALS; type++) + if (ad->vals[type].newval != ad->vals[type].oldval) { + /* unconditionally set type, overwriting TK */ + context->type = AUDIT_TIME_ADJNTPVAL; + memcpy(&context->time.ntp_data, ad, sizeof(*ad)); + break; + } } void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, diff --git a/kernel/bounds.c b/kernel/bounds.c index 9795d75b09b23..10dd9e6b03e55 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -22,6 +22,13 @@ int main(void) DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); +#ifdef CONFIG_LRU_GEN + DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1)); + DEFINE(LRU_REFS_WIDTH, MAX_NR_TIERS - 2); +#else + DEFINE(LRU_GEN_WIDTH, 0); + DEFINE(LRU_REFS_WIDTH, 0); +#endif /* End of constants */ return 0; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 3e23b3fa79ff6..ac89e65d1692e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -403,6 +403,9 @@ static struct btf_type btf_void; static int btf_resolve(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id); +static int btf_func_check(struct btf_verifier_env *env, + const struct btf_type *t); + static bool btf_type_is_modifier(const struct btf_type *t) { /* Some of them is not strictly a C modifier @@ -579,6 +582,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t) btf_type_is_struct(t) || btf_type_is_array(t) || btf_type_is_var(t) || + btf_type_is_func(t) || btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -3533,9 +3537,24 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env, return 0; } +static int btf_func_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_type *t = v->t; + u32 next_type_id = t->type; + int err; + + err = btf_func_check(env, t); + if (err) + return err; + + env_stack_pop_resolved(env, next_type_id, 0); + return 0; +} + static struct btf_kind_operations func_ops = { .check_meta = btf_func_check_meta, - .resolve = btf_df_resolve, + .resolve = btf_func_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_ref_type_log, @@ -4156,7 +4175,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return !btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); - if (btf_type_is_decl_tag(t)) + if (btf_type_is_decl_tag(t) || btf_type_is_func(t)) return btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); @@ -4246,12 +4265,6 @@ static int btf_check_all_types(struct btf_verifier_env *env) if (err) return err; } - - if (btf_type_is_func(t)) { - err = btf_func_check(env, t); - if (err) - return err; - } } return 0; @@ -6201,12 +6214,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } +enum { + BTF_MODULE_F_LIVE = (1 << 0), +}; + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module { struct list_head list; struct module *module; struct btf *btf; struct bin_attribute *sysfs_attr; + int flags; }; static LIST_HEAD(btf_modules); @@ -6234,7 +6252,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, int err = 0; if (mod->btf_data_size == 0 || - (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) + (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && + op != MODULE_STATE_GOING)) goto out; switch (op) { @@ -6292,6 +6311,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, btf_mod->sysfs_attr = attr; } + break; + case MODULE_STATE_LIVE: + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_mod->flags |= BTF_MODULE_F_LIVE; + break; + } + mutex_unlock(&btf_module_mutex); break; case MODULE_STATE_GOING: mutex_lock(&btf_module_mutex); @@ -6339,7 +6369,12 @@ struct module *btf_try_get_module(const struct btf *btf) if (btf_mod->btf != btf) continue; - if (try_module_get(btf_mod->module)) + /* We must only consider module whose __init routine has + * finished, hence we must check for BTF_MODULE_F_LIVE flag, + * which is set from the notifier callback for + * MODULE_STATE_LIVE. + */ + if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) res = btf_mod->module; break; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 22c8ae94e4c1c..2823dcefae10e 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -166,7 +166,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, } static struct perf_callchain_entry * -get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) +get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) { #ifdef CONFIG_STACKTRACE struct perf_callchain_entry *entry; @@ -177,9 +177,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) if (!entry) return NULL; - entry->nr = init_nr + - stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr), - sysctl_perf_event_max_stack - init_nr, 0); + entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, + max_depth, 0); /* stack_trace_save_tsk() works on unsigned long array, while * perf_callchain_entry uses u64 array. For 32-bit systems, it is @@ -191,7 +190,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) int i; /* copy data from the end to avoid using extra buffer */ - for (i = entry->nr - 1; i >= (int)init_nr; i--) + for (i = entry->nr - 1; i >= 0; i--) to[i] = (u64)(from[i]); } @@ -208,27 +207,19 @@ static long __bpf_get_stackid(struct bpf_map *map, { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *new_bucket, *old_bucket; - u32 max_depth = map->value_size / stack_map_data_size(map); - /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ - u32 init_nr = sysctl_perf_event_max_stack - max_depth; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; u32 hash, id, trace_nr, trace_len; bool user = flags & BPF_F_USER_STACK; u64 *ips; bool hash_matches; - /* get_perf_callchain() guarantees that trace->nr >= init_nr - * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth - */ - trace_nr = trace->nr - init_nr; - - if (trace_nr <= skip) + if (trace->nr <= skip) /* skipping more than usable stack trace */ return -EFAULT; - trace_nr -= skip; + trace_nr = trace->nr - skip; trace_len = trace_nr * sizeof(u64); - ips = trace->ip + skip + init_nr; + ips = trace->ip + skip; hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); id = hash & (smap->n_buckets - 1); bucket = READ_ONCE(smap->buckets[id]); @@ -285,8 +276,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, u64, flags) { u32 max_depth = map->value_size / stack_map_data_size(map); - /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ - u32 init_nr = sysctl_perf_event_max_stack - max_depth; + u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; bool kernel = !user; @@ -295,8 +285,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) return -EINVAL; - trace = get_perf_callchain(regs, init_nr, kernel, user, - sysctl_perf_event_max_stack, false, false); + max_depth += skip; + if (max_depth > sysctl_perf_event_max_stack) + max_depth = sysctl_perf_event_max_stack; + + trace = get_perf_callchain(regs, 0, kernel, user, max_depth, + false, false); if (unlikely(!trace)) /* couldn't fetch the stack trace */ @@ -387,7 +381,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, struct perf_callchain_entry *trace_in, void *buf, u32 size, u64 flags) { - u32 init_nr, trace_nr, copy_len, elem_size, num_elem; + u32 trace_nr, copy_len, elem_size, num_elem, max_depth; bool user_build_id = flags & BPF_F_USER_BUILD_ID; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; @@ -412,30 +406,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, goto err_fault; num_elem = size / elem_size; - if (sysctl_perf_event_max_stack < num_elem) - init_nr = 0; - else - init_nr = sysctl_perf_event_max_stack - num_elem; + max_depth = num_elem + skip; + if (sysctl_perf_event_max_stack < max_depth) + max_depth = sysctl_perf_event_max_stack; if (trace_in) trace = trace_in; else if (kernel && task) - trace = get_callchain_entry_for_task(task, init_nr); + trace = get_callchain_entry_for_task(task, max_depth); else - trace = get_perf_callchain(regs, init_nr, kernel, user, - sysctl_perf_event_max_stack, + trace = get_perf_callchain(regs, 0, kernel, user, max_depth, false, false); if (unlikely(!trace)) goto err_fault; - trace_nr = trace->nr - init_nr; - if (trace_nr < skip) + if (trace->nr < skip) goto err_fault; - trace_nr -= skip; + trace_nr = trace->nr - skip; trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; copy_len = trace_nr * elem_size; - ips = trace->ip + skip + init_nr; + + ips = trace->ip + skip; if (user && user_build_id) stack_map_get_build_id_offset(buf, ips, trace_nr, user); else diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 6e36e854b5124..929ed3bf1a7cf 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -165,7 +165,6 @@ struct cgroup_mgctx { #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index df2bface866ef..85cb51c4a17e6 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -291,7 +291,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) */ int kdb_putarea_size(unsigned long addr, void *res, size_t size) { - int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); + int ret = copy_to_kernel_nofault((char *)addr, (char *)res, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_func_printf("Bad address 0x%lx\n", addr); diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 7a14ca29c3778..f8ff598596b85 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -927,7 +927,7 @@ static __init int dma_debug_cmdline(char *str) global_disable = true; } - return 0; + return 1; } static __init int dma_debug_entries_cmdline(char *str) @@ -936,7 +936,7 @@ static __init int dma_debug_entries_cmdline(char *str) return -EINVAL; if (!get_option(&str, &nr_prealloc_entries)) nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; + return 1; } __setup("dma_debug=", dma_debug_cmdline); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 6db1c475ec827..6c350555e5a1c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -701,13 +701,10 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, size_t size, enum dma_data_direction dir) { - /* - * Unconditional bounce is necessary to avoid corruption on - * sync_*_for_cpu or dma_ummap_* when the device didn't overwrite - * the whole lengt of the bounce buffer. - */ - swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); - BUG_ON(!valid_dma_direction(dir)); + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); + else + BUG_ON(dir != DMA_FROM_DEVICE); } void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, diff --git a/kernel/events/core.c b/kernel/events/core.c index 6859229497b15..69cf71d973121 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10574,8 +10574,11 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, } /* ready to consume more filters */ + kfree(filename); + filename = NULL; state = IF_STATE_ACTION; filter = NULL; + kernel = 0; } } diff --git a/kernel/exit.c b/kernel/exit.c index b00a25bb4ab93..54d2ce4b93d1e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -463,6 +463,7 @@ void mm_update_next_owner(struct mm_struct *mm) goto retry; } WRITE_ONCE(mm->owner, c); + lru_gen_migrate_mm(mm); task_unlock(c); put_task_struct(c); } diff --git a/kernel/fork.c b/kernel/fork.c index f1e89007f2288..463b2d2bf5d66 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -98,6 +98,10 @@ #include #include +#ifdef CONFIG_USER_NS +#include +#endif + #include #include #include @@ -1079,6 +1083,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, goto fail_nocontext; mm->user_ns = get_user_ns(user_ns); + lru_gen_init_mm(mm); return mm; fail_nocontext: @@ -1121,6 +1126,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + lru_gen_del_mm(mm); mmdrop(mm); } @@ -1922,6 +1928,10 @@ static __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -2586,6 +2596,13 @@ pid_t kernel_clone(struct kernel_clone_args *args) get_task_struct(p); } + if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) { + /* lock the task to synchronize with memcg migration */ + task_lock(p); + lru_gen_add_mm(p->mm); + task_unlock(p); + } + wake_up_new_task(p); /* forking complete and child started to run, tell ptracer */ @@ -3036,6 +3053,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2267e6527db3c..3e129f2e19d15 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -681,6 +681,27 @@ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); +/** + * generic_dispatch_irq - Dispatch an interrupt from an interrupt handler + * @irq: The irq number to handle + * + * A wrapper around generic_handle_irq() which ensures that interrupts are + * disabled when the primary handler of the dispatched irq is invoked. + * This is useful for interrupt handlers with dispatching to be safe for + * the forced threaded case. + */ +int generic_dispatch_irq(unsigned int irq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = generic_handle_irq(irq); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_dispatch_irq); + /** * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging * to a domain. diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 585494ec464f9..bc475e62279d2 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -190,7 +190,7 @@ static int klp_find_object_symbol(const char *objname, const char *name, return -EINVAL; } -static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, +static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, unsigned int symndx, Elf_Shdr *relasec, const char *sec_objname) { @@ -218,7 +218,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { - sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); + sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { pr_err("symbol %s is not marked as a livepatch symbol\n", strtab + sym->st_name); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f8a0212189cad..4675a686f942f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -183,11 +183,9 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES); static struct hlist_head lock_keys_hash[KEYHASH_SIZE]; unsigned long nr_lock_classes; unsigned long nr_zapped_classes; -#ifndef CONFIG_DEBUG_LOCKDEP -static -#endif +unsigned long max_lock_class_idx; struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; -static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS); +DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS); static inline struct lock_class *hlock_class(struct held_lock *hlock) { @@ -338,7 +336,7 @@ static inline void lock_release_holdtime(struct held_lock *hlock) * elements. These elements are linked together by the lock_entry member in * struct lock_class. */ -LIST_HEAD(all_lock_classes); +static LIST_HEAD(all_lock_classes); static LIST_HEAD(free_lock_classes); /** @@ -1252,6 +1250,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) struct lockdep_subclass_key *key; struct hlist_head *hash_head; struct lock_class *class; + int idx; DEBUG_LOCKS_WARN_ON(!irqs_disabled()); @@ -1317,6 +1316,9 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) * of classes. */ list_move_tail(&class->lock_entry, &all_lock_classes); + idx = class - lock_classes; + if (idx > max_lock_class_idx) + max_lock_class_idx = idx; if (verbose(class)) { graph_unlock(); @@ -6000,6 +6002,8 @@ static void zap_class(struct pending_free *pf, struct lock_class *class) WRITE_ONCE(class->name, NULL); nr_lock_classes--; __clear_bit(class - lock_classes, lock_classes_in_use); + if (class - lock_classes == max_lock_class_idx) + max_lock_class_idx--; } else { WARN_ONCE(true, "%s() failed for class %s\n", __func__, class->name); @@ -6290,7 +6294,13 @@ void lockdep_reset_lock(struct lockdep_map *lock) lockdep_reset_lock_reg(lock); } -/* Unregister a dynamically allocated key. */ +/* + * Unregister a dynamically allocated key. + * + * Unlike lockdep_register_key(), a search is always done to find a matching + * key irrespective of debug_locks to avoid potential invalid access to freed + * memory in lock_class entry. + */ void lockdep_unregister_key(struct lock_class_key *key) { struct hlist_head *hash_head = keyhashentry(key); @@ -6305,10 +6315,8 @@ void lockdep_unregister_key(struct lock_class_key *key) return; raw_local_irq_save(flags); - if (!graph_lock()) - goto out_irq; + lockdep_lock(); - pf = get_pending_free(); hlist_for_each_entry_rcu(k, hash_head, hash_entry) { if (k == key) { hlist_del_rcu(&k->hash_entry); @@ -6316,11 +6324,13 @@ void lockdep_unregister_key(struct lock_class_key *key) break; } } - WARN_ON_ONCE(!found); - __lockdep_free_key_range(pf, key, 1); - call_rcu_zapped(pf); - graph_unlock(); -out_irq: + WARN_ON_ONCE(!found && debug_locks); + if (found) { + pf = get_pending_free(); + __lockdep_free_key_range(pf, key, 1); + call_rcu_zapped(pf); + } + lockdep_unlock(); raw_local_irq_restore(flags); /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */ diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index ecb8662e7a4ed..bbe9000260d02 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -121,7 +121,6 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) -extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; #define LOCK_USAGE_CHARS (2*XXX_LOCK_USAGE_STATES + 1) @@ -151,6 +150,10 @@ extern unsigned int nr_large_chain_blocks; extern unsigned int max_lockdep_depth; extern unsigned int max_bfs_queue_depth; +extern unsigned long max_lock_class_idx; + +extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; +extern unsigned long lock_classes_in_use[]; #ifdef CONFIG_PROVE_LOCKING extern unsigned long lockdep_count_forward_deps(struct lock_class *); @@ -205,7 +208,6 @@ struct lockdep_stats { }; DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); -extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; #define __debug_atomic_inc(ptr) \ this_cpu_inc(lockdep_stats.ptr); diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index b8d9a050c337a..15fdc7fa5c688 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -24,14 +24,33 @@ #include "lockdep_internals.h" +/* + * Since iteration of lock_classes is done without holding the lockdep lock, + * it is not safe to iterate all_lock_classes list directly as the iteration + * may branch off to free_lock_classes or the zapped list. Iteration is done + * directly on the lock_classes array by checking the lock_classes_in_use + * bitmap and max_lock_class_idx. + */ +#define iterate_lock_classes(idx, class) \ + for (idx = 0, class = lock_classes; idx <= max_lock_class_idx; \ + idx++, class++) + static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &all_lock_classes, pos); + struct lock_class *class = v; + + ++class; + *pos = class - lock_classes; + return (*pos > max_lock_class_idx) ? NULL : class; } static void *l_start(struct seq_file *m, loff_t *pos) { - return seq_list_start_head(&all_lock_classes, *pos); + unsigned long idx = *pos; + + if (idx > max_lock_class_idx) + return NULL; + return lock_classes + idx; } static void l_stop(struct seq_file *m, void *v) @@ -57,14 +76,16 @@ static void print_name(struct seq_file *m, struct lock_class *class) static int l_show(struct seq_file *m, void *v) { - struct lock_class *class = list_entry(v, struct lock_class, lock_entry); + struct lock_class *class = v; struct lock_list *entry; char usage[LOCK_USAGE_CHARS]; + int idx = class - lock_classes; - if (v == &all_lock_classes) { + if (v == lock_classes) seq_printf(m, "all lock classes:\n"); + + if (!test_bit(idx, lock_classes_in_use)) return 0; - } seq_printf(m, "%p", class->key); #ifdef CONFIG_DEBUG_LOCKDEP @@ -220,8 +241,11 @@ static int lockdep_stats_show(struct seq_file *m, void *v) #ifdef CONFIG_PROVE_LOCKING struct lock_class *class; + unsigned long idx; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; if (class->usage_mask == 0) nr_unused++; @@ -254,6 +278,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v) sum_forward_deps += lockdep_count_forward_deps(class); } + #ifdef CONFIG_DEBUG_LOCKDEP DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); #endif @@ -345,6 +370,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v) seq_printf(m, " max bfs queue depth: %11u\n", max_bfs_queue_depth); #endif + seq_printf(m, " max lock class index: %11lu\n", + max_lock_class_idx); lockdep_stats_debug_show(m); seq_printf(m, " debug_locks: %11u\n", debug_locks); @@ -622,12 +649,16 @@ static int lock_stat_open(struct inode *inode, struct file *file) if (!res) { struct lock_stat_data *iter = data->stats; struct seq_file *m = file->private_data; + unsigned long idx; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; iter->class = class; iter->stats = lock_stats(class); iter++; } + data->iter_end = iter; sort(data->stats, data->iter_end - data->stats, @@ -645,6 +676,7 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct lock_class *class; + unsigned long idx; char c; if (count) { @@ -654,8 +686,11 @@ static ssize_t lock_stat_write(struct file *file, const char __user *buf, if (c != '0') return count; - list_for_each_entry(class, &all_lock_classes, lock_entry) + iterate_lock_classes(idx, class) { + if (!test_bit(idx, lock_classes_in_use)) + continue; clear_lock_stats(class); + } } return count; } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e6af502c2fd77..08780a466fdf7 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1328,7 +1328,7 @@ static int __init resumedelay_setup(char *str) int rc = kstrtouint(str, 0, &resume_delay); if (rc) - return rc; + pr_warn("resumedelay: bad option string '%s'\n", str); return 1; } diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index d20526c5be15b..b663a97f5867a 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -157,22 +157,22 @@ static int __init setup_test_suspend(char *value) value++; suspend_type = strsep(&value, ","); if (!suspend_type) - return 0; + return 1; repeat = strsep(&value, ","); if (repeat) { if (kstrtou32(repeat, 0, &test_repeat_count_max)) - return 0; + return 1; } for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) if (!strcmp(pm_labels[i], suspend_type)) { test_state_label = pm_labels[i]; - return 0; + return 1; } printk(warn_bad_state, suspend_type); - return 0; + return 1; } __setup("test_suspend", setup_test_suspend); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 82abfaf3c2aad..833e407545b82 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -146,8 +146,10 @@ static int __control_devkmsg(char *str) static int __init control_devkmsg(char *str) { - if (__control_devkmsg(str) < 0) + if (__control_devkmsg(str) < 0) { + pr_warn("printk.devkmsg: bad option string '%s'\n", str); return 1; + } /* * Set sysctl string accordingly: @@ -166,7 +168,7 @@ static int __init control_devkmsg(char *str) */ devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; - return 0; + return 1; } __setup("printk.devkmsg=", control_devkmsg); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index eea265082e975..ccc4b465775b8 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -371,6 +371,26 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } +static int check_ptrace_options(unsigned long data) +{ + if (data & ~(unsigned long)PTRACE_O_MASK) + return -EINVAL; + + if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { + if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || + !IS_ENABLED(CONFIG_SECCOMP)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || + current->ptrace & PT_SUSPEND_SECCOMP) + return -EPERM; + } + return 0; +} + static int ptrace_attach(struct task_struct *task, long request, unsigned long addr, unsigned long flags) @@ -382,8 +402,16 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) { if (addr != 0) goto out; + /* + * This duplicates the check in check_ptrace_options() because + * ptrace_attach() and ptrace_setoptions() have historically + * used different error codes for unknown ptrace options. + */ if (flags & ~(unsigned long)PTRACE_O_MASK) goto out; + retval = check_ptrace_options(flags); + if (retval) + return retval; flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); } else { flags = PT_PTRACED; @@ -654,22 +682,11 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds static int ptrace_setoptions(struct task_struct *child, unsigned long data) { unsigned flags; + int ret; - if (data & ~(unsigned long)PTRACE_O_MASK) - return -EINVAL; - - if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { - if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || - !IS_ENABLED(CONFIG_SECCOMP)) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || - current->ptrace & PT_SUSPEND_SECCOMP) - return -EPERM; - } + ret = check_ptrace_options(data); + if (ret) + return ret; /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index e373fbe44da5e..431cee212467d 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -56,13 +56,13 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp, int flags) { - rsclp->flags |= flags; + WRITE_ONCE(rsclp->flags, rsclp->flags | flags); } static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp, int flags) { - rsclp->flags &= ~flags; + WRITE_ONCE(rsclp->flags, rsclp->flags & ~flags); } static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp, diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283b0b..5dae0da879ae2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -91,7 +91,7 @@ static struct rcu_state rcu_state = { .abbr = RCU_ABBR, .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex), - .ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock), + .ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED, }; /* Dump rcu_node combining tree at boot to verify correct setup. */ @@ -1086,9 +1086,8 @@ void rcu_irq_enter_irqson(void) * Just check whether or not this CPU has non-offloaded RCU callbacks * queued. */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) +int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); } @@ -1175,7 +1174,15 @@ bool rcu_lockdep_current_cpu_online(void) preempt_disable_notrace(); rdp = this_cpu_ptr(&rcu_data); rnp = rdp->mynode; - if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1) + /* + * Strictly, we care here about the case where the current CPU is + * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask + * not being up to date. So arch_spin_is_locked() might have a + * false positive if it's held by some *other* CPU, but that's + * OK because that just means a false *negative* on the warning. + */ + if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || + arch_spin_is_locked(&rcu_state.ofl_lock)) ret = true; preempt_enable_notrace(); return ret; @@ -1739,7 +1746,6 @@ static void rcu_strict_gp_boundary(void *unused) */ static noinline_for_stack bool rcu_gp_init(void) { - unsigned long firstseq; unsigned long flags; unsigned long oldmask; unsigned long mask; @@ -1782,22 +1788,17 @@ static noinline_for_stack bool rcu_gp_init(void) * of RCU's Requirements documentation. */ WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF); + /* Exclude CPU hotplug operations. */ rcu_for_each_leaf_node(rnp) { - // Wait for CPU-hotplug operations that might have - // started before this grace period did. - smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values. - firstseq = READ_ONCE(rnp->ofl_seq); - if (firstseq & 0x1) - while (firstseq == READ_ONCE(rnp->ofl_seq)) - schedule_timeout_idle(1); // Can't wake unless RCU is watching. - smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values. - raw_spin_lock(&rcu_state.ofl_lock); - raw_spin_lock_irq_rcu_node(rnp); + local_irq_save(flags); + arch_spin_lock(&rcu_state.ofl_lock); + raw_spin_lock_rcu_node(rnp); if (rnp->qsmaskinit == rnp->qsmaskinitnext && !rnp->wait_blkd_tasks) { /* Nothing to do on this leaf rcu_node structure. */ - raw_spin_unlock_irq_rcu_node(rnp); - raw_spin_unlock(&rcu_state.ofl_lock); + raw_spin_unlock_rcu_node(rnp); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); continue; } @@ -1832,8 +1833,9 @@ static noinline_for_stack bool rcu_gp_init(void) rcu_cleanup_dead_rnp(rnp); } - raw_spin_unlock_irq_rcu_node(rnp); - raw_spin_unlock(&rcu_state.ofl_lock); + raw_spin_unlock_rcu_node(rnp); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); } rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */ @@ -4287,11 +4289,10 @@ void rcu_cpu_starting(unsigned int cpu) rnp = rdp->mynode; mask = rdp->grpmask; - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); + local_irq_save(flags); + arch_spin_lock(&rcu_state.ofl_lock); rcu_dynticks_eqs_online(); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - raw_spin_lock_irqsave_rcu_node(rnp, flags); + raw_spin_lock_rcu_node(rnp); WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); newcpu = !(rnp->expmaskinitnext & mask); rnp->expmaskinitnext |= mask; @@ -4304,15 +4305,18 @@ void rcu_cpu_starting(unsigned int cpu) /* An incoming CPU should never be blocking a grace period. */ if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */ + /* rcu_report_qs_rnp() *really* wants some flags to restore */ + unsigned long flags2; + + local_irq_save(flags2); rcu_disable_urgency_upon_qs(rdp); /* Report QS -after- changing ->qsmaskinitnext! */ - rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2); } else { - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + raw_spin_unlock_rcu_node(rnp); } - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(rnp->ofl_seq & 0x1); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ } @@ -4326,7 +4330,7 @@ void rcu_cpu_starting(unsigned int cpu) */ void rcu_report_dead(unsigned int cpu) { - unsigned long flags; + unsigned long flags, seq_flags; unsigned long mask; struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ @@ -4340,10 +4344,8 @@ void rcu_report_dead(unsigned int cpu) /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ mask = rdp->grpmask; - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - raw_spin_lock(&rcu_state.ofl_lock); + local_irq_save(seq_flags); + arch_spin_lock(&rcu_state.ofl_lock); raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq); rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags); @@ -4354,10 +4356,8 @@ void rcu_report_dead(unsigned int cpu) } WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - raw_spin_unlock(&rcu_state.ofl_lock); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(rnp->ofl_seq & 0x1); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(seq_flags); rdp->cpu_started = false; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 486fc901bd085..4b4bcef8a9743 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -56,8 +56,6 @@ struct rcu_node { /* Initialized from ->qsmaskinitnext at the */ /* beginning of each grace period. */ unsigned long qsmaskinitnext; - unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */ - /* Online CPUs for next grace period. */ unsigned long expmask; /* CPUs or groups that need to check in */ /* to allow the current expedited GP */ /* to complete. */ @@ -355,7 +353,7 @@ struct rcu_state { const char *name; /* Name of structure. */ char abbr; /* Abbreviated name. */ - raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; + arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; /* Synchronize offline with */ /* GP pre-initialization. */ }; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68a15..5678bee7aefee 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -556,16 +556,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } - /* Unboost if we were boosted. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) - rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); - /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) rcu_report_exp_rnp(rnp, true); + + /* Unboost if we were boosted. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) + rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); } else { local_irq_restore(flags); } diff --git a/kernel/resource.c b/kernel/resource.c index 9c08d6e9eef27..34eaee179689a 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -56,14 +56,6 @@ struct resource_constraint { static DEFINE_RWLOCK(resource_lock); -/* - * For memory hotplug, there is no way to free resource entries allocated - * by boot mem after the system is up. So for reusing the resource entry - * we need to remember the resource. - */ -static struct resource *bootmem_resource_free; -static DEFINE_SPINLOCK(bootmem_resource_lock); - static struct resource *next_resource(struct resource *p) { if (p->child) @@ -160,36 +152,19 @@ __initcall(ioresources_init); static void free_resource(struct resource *res) { - if (!res) - return; - - if (!PageSlab(virt_to_head_page(res))) { - spin_lock(&bootmem_resource_lock); - res->sibling = bootmem_resource_free; - bootmem_resource_free = res; - spin_unlock(&bootmem_resource_lock); - } else { + /** + * If the resource was allocated using memblock early during boot + * we'll leak it here: we can only return full pages back to the + * buddy and trying to be smart and reusing them eventually in + * alloc_resource() overcomplicates resource handling. + */ + if (res && PageSlab(virt_to_head_page(res))) kfree(res); - } } static struct resource *alloc_resource(gfp_t flags) { - struct resource *res = NULL; - - spin_lock(&bootmem_resource_lock); - if (bootmem_resource_free) { - res = bootmem_resource_free; - bootmem_resource_free = res->sibling; - } - spin_unlock(&bootmem_resource_lock); - - if (res) - memset(res, 0, sizeof(struct resource)); - else - res = kzalloc(sizeof(struct resource), flags); - - return res; + return kzalloc(sizeof(struct resource), flags); } /* Return the conflict entry if you can't request it */ diff --git a/kernel/rseq.c b/kernel/rseq.c index 6d45ac3dae7fb..97ac20b4f7387 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -128,10 +128,10 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) int ret; #ifdef CONFIG_64BIT - if (get_user(ptr, &t->rseq->rseq_cs.ptr64)) + if (get_user(ptr, &t->rseq->rseq_cs)) return -EFAULT; #else - if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr))) + if (copy_from_user(&ptr, &t->rseq->rseq_cs, sizeof(ptr))) return -EFAULT; #endif if (!ptr) { @@ -217,9 +217,9 @@ static int clear_rseq_cs(struct task_struct *t) * Set rseq_cs to NULL. */ #ifdef CONFIG_64BIT - return put_user(0UL, &t->rseq->rseq_cs.ptr64); + return put_user(0UL, &t->rseq->rseq_cs); #else - if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64))) + if (clear_user(&t->rseq->rseq_cs, sizeof(t->rseq->rseq_cs))) return -EFAULT; return 0; #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9745613d531ce..04bfe304bbcf7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -36,6 +36,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp); +EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_thermal_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_cpu_capacity_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp); @@ -4979,6 +4980,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * finish_task_switch()'s mmdrop(). */ switch_mm_irqs_off(prev->active_mm, next->mm, next); + lru_gen_use_mm(next->mm); if (!prev->mm) { // from kernel /* will mmdrop() in finish_task_switch(). */ diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 3d06c5e4220d4..307800586ac81 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -334,12 +334,13 @@ static struct cftype files[] = { */ void cpuacct_charge(struct task_struct *tsk, u64 cputime) { + unsigned int cpu = task_cpu(tsk); struct cpuacct *ca; rcu_read_lock(); for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) - __this_cpu_add(*ca->cpuusage, cputime); + *per_cpu_ptr(ca->cpuusage, cpu) += cputime; rcu_read_unlock(); } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 26778884d9ab1..6d65ab6e484e2 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -289,6 +289,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * into the same scale so we can compare. */ boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; + boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; } @@ -348,8 +349,11 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. + * + * Except when the rq is capped by uclamp_max. */ - if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { + if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && + sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { next_f = sg_policy->next_freq; /* Restore cached freq as next_freq has changed */ @@ -395,8 +399,11 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, /* * Do not reduce the target performance level if the CPU has not been * idle recently, as the reduction is likely to be premature then. + * + * Except when the rq is capped by uclamp_max. */ - if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) + if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && + sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index d2c072b0ef01f..62f0cf8422775 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2240,12 +2240,6 @@ static int push_dl_task(struct rq *rq) return 0; retry: - if (is_migration_disabled(next_task)) - return 0; - - if (WARN_ON(next_task == rq->curr)) - return 0; - /* * If next_task preempts rq->curr, and rq->curr * can move away, it makes sense to just reschedule @@ -2258,6 +2252,12 @@ static int push_dl_task(struct rq *rq) return 0; } + if (is_migration_disabled(next_task)) + return 0; + + if (WARN_ON(next_task == rq->curr)) + return 0; + /* We might release rq lock */ get_task_struct(next_task); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index aa29211de1bf8..102d6f70e84d3 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -931,25 +931,15 @@ void print_numa_stats(struct seq_file *m, int node, unsigned long tsf, static void sched_show_numa(struct task_struct *p, struct seq_file *m) { #ifdef CONFIG_NUMA_BALANCING - struct mempolicy *pol; - if (p->mm) P(mm->numa_scan_seq); - task_lock(p); - pol = p->mempolicy; - if (pol && !(pol->flags & MPOL_F_MORON)) - pol = NULL; - mpol_get(pol); - task_unlock(p); - P(numa_pages_migrated); P(numa_preferred_nid); P(total_numa_faults); SEQ_printf(m, "current_node=%d, numa_group_id=%d\n", task_node(p), task_numa_group_id(p)); show_numa_stats(p, m); - mpol_put(pol); #endif } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5146163bfabb9..cddcf2f4f5251 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9040,9 +9040,10 @@ static bool update_pick_idlest(struct sched_group *idlest, * This is an approximation as the number of running tasks may not be * related to the number of busy CPUs due to sched_setaffinity. */ -static inline bool allow_numa_imbalance(int dst_running, int dst_weight) +static inline bool +allow_numa_imbalance(unsigned int running, unsigned int weight) { - return (dst_running < (dst_weight >> 2)); + return (running < (weight >> 2)); } /* @@ -9176,12 +9177,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) return idlest; #endif /* - * Otherwise, keep the task on this node to stay close - * its wakeup source and improve locality. If there is - * a real need of migration, periodic load balance will - * take care of it. + * Otherwise, keep the task close to the wakeup source + * and improve locality if the number of running tasks + * would remain below threshold where an imbalance is + * allowed. If there is a real need of migration, + * periodic load balance will take care of it. */ - if (allow_numa_imbalance(local_sgs.sum_nr_running, sd->span_weight)) + if (allow_numa_imbalance(local_sgs.sum_nr_running + 1, local_sgs.group_weight)) return NULL; } @@ -9387,7 +9389,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s /* Consider allowing a small imbalance between NUMA groups */ if (env->sd->flags & SD_NUMA) { env->imbalance = adjust_numa_imbalance(env->imbalance, - busiest->sum_nr_running, busiest->group_weight); + local->sum_nr_running + 1, local->group_weight); } return; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7b4f4fbbb4048..14f273c295183 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2026,6 +2026,16 @@ static int push_rt_task(struct rq *rq, bool pull) return 0; retry: + /* + * It's possible that the next_task slipped in of + * higher priority than current. If that's the case + * just reschedule current. + */ + if (unlikely(next_task->prio < rq->curr->prio)) { + resched_curr(rq); + return 0; + } + if (is_migration_disabled(next_task)) { struct task_struct *push_task = NULL; int cpu; @@ -2033,6 +2043,18 @@ static int push_rt_task(struct rq *rq, bool pull) if (!pull || rq->push_busy) return 0; + /* + * Invoking find_lowest_rq() on anything but an RT task doesn't + * make sense. Per the above priority check, curr has to + * be of higher priority than next_task, so no need to + * reschedule when bailing out. + * + * Note that the stoppers are masqueraded as SCHED_FIFO + * (cf. sched_set_stop_task()), so we can't rely on rt_task(). + */ + if (rq->curr->sched_class != &rt_sched_class) + return 0; + cpu = find_lowest_rq(rq->curr); if (cpu == -1 || cpu == rq->cpu) return 0; @@ -2057,16 +2079,6 @@ static int push_rt_task(struct rq *rq, bool pull) if (WARN_ON(next_task == rq->curr)) return 0; - /* - * It's possible that the next_task slipped in of - * higher priority than current. If that's the case - * just reschedule current. - */ - if (unlikely(next_task->prio < rq->curr->prio)) { - resched_curr(rq); - return 0; - } - /* We might release rq lock */ get_task_struct(next_task); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index de53be9057390..9b33ba9c3c420 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2841,88 +2841,6 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ -#ifdef CONFIG_UCLAMP_TASK -unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); - -/** - * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values. - * @rq: The rq to clamp against. Must not be NULL. - * @util: The util value to clamp. - * @p: The task to clamp against. Can be NULL if you want to clamp - * against @rq only. - * - * Clamps the passed @util to the max(@rq, @p) effective uclamp values. - * - * If sched_uclamp_used static key is disabled, then just return the util - * without any clamping since uclamp aggregation at the rq level in the fast - * path is disabled, rendering this operation a NOP. - * - * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It - * will return the correct effective uclamp value of the task even if the - * static key is disabled. - */ -static __always_inline -unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, - struct task_struct *p) -{ - unsigned long min_util = 0; - unsigned long max_util = 0; - - if (!static_branch_likely(&sched_uclamp_used)) - return util; - - if (p) { - min_util = uclamp_eff_value(p, UCLAMP_MIN); - max_util = uclamp_eff_value(p, UCLAMP_MAX); - - /* - * Ignore last runnable task's max clamp, as this task will - * reset it. Similarly, no need to read the rq's min clamp. - */ - if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) - goto out; - } - - min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value)); - max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value)); -out: - /* - * Since CPU's {min,max}_util clamps are MAX aggregated considering - * RUNNABLE tasks with _different_ clamps, we can end up with an - * inversion. Fix it now when the clamps are applied. - */ - if (unlikely(min_util >= max_util)) - return min_util; - - return clamp(util, min_util, max_util); -} - -/* - * When uclamp is compiled in, the aggregation at rq level is 'turned off' - * by default in the fast path and only gets turned on once userspace performs - * an operation that requires it. - * - * Returns true if userspace opted-in to use uclamp and aggregation at rq level - * hence is active. - */ -static inline bool uclamp_is_used(void) -{ - return static_branch_likely(&sched_uclamp_used); -} -#else /* CONFIG_UCLAMP_TASK */ -static inline -unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, - struct task_struct *p) -{ - return util; -} - -static inline bool uclamp_is_used(void) -{ - return false; -} -#endif /* CONFIG_UCLAMP_TASK */ - #ifdef arch_scale_freq_capacity # ifndef arch_scale_freq_invariant # define arch_scale_freq_invariant() true @@ -3020,6 +2938,105 @@ static inline unsigned long cpu_util_rt(struct rq *rq) } #endif +#ifdef CONFIG_UCLAMP_TASK +unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); + +/** + * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values. + * @rq: The rq to clamp against. Must not be NULL. + * @util: The util value to clamp. + * @p: The task to clamp against. Can be NULL if you want to clamp + * against @rq only. + * + * Clamps the passed @util to the max(@rq, @p) effective uclamp values. + * + * If sched_uclamp_used static key is disabled, then just return the util + * without any clamping since uclamp aggregation at the rq level in the fast + * path is disabled, rendering this operation a NOP. + * + * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It + * will return the correct effective uclamp value of the task even if the + * static key is disabled. + */ +static __always_inline +unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, + struct task_struct *p) +{ + unsigned long min_util = 0; + unsigned long max_util = 0; + + if (!static_branch_likely(&sched_uclamp_used)) + return util; + + if (p) { + min_util = uclamp_eff_value(p, UCLAMP_MIN); + max_util = uclamp_eff_value(p, UCLAMP_MAX); + + /* + * Ignore last runnable task's max clamp, as this task will + * reset it. Similarly, no need to read the rq's min clamp. + */ + if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) + goto out; + } + + min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value)); + max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value)); +out: + /* + * Since CPU's {min,max}_util clamps are MAX aggregated considering + * RUNNABLE tasks with _different_ clamps, we can end up with an + * inversion. Fix it now when the clamps are applied. + */ + if (unlikely(min_util >= max_util)) + return min_util; + + return clamp(util, min_util, max_util); +} + +/* Is the rq being capped/throttled by uclamp_max? */ +static inline bool uclamp_rq_is_capped(struct rq *rq) +{ + unsigned long rq_util; + unsigned long max_util; + + if (!static_branch_likely(&sched_uclamp_used)) + return false; + + rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq); + max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); + + return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util; +} + +/* + * When uclamp is compiled in, the aggregation at rq level is 'turned off' + * by default in the fast path and only gets turned on once userspace performs + * an operation that requires it. + * + * Returns true if userspace opted-in to use uclamp and aggregation at rq level + * hence is active. + */ +static inline bool uclamp_is_used(void) +{ + return static_branch_likely(&sched_uclamp_used); +} +#else /* CONFIG_UCLAMP_TASK */ +static inline +unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, + struct task_struct *p) +{ + return util; +} + +static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; } + +static inline bool uclamp_is_used(void) +{ + return false; +} +#endif /* CONFIG_UCLAMP_TASK */ + #ifdef CONFIG_HAVE_SCHED_AVG_IRQ static inline unsigned long cpu_util_irq(struct rq *rq) { diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a492f159624fa..dc765f3eff16f 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -291,6 +291,7 @@ COND_SYSCALL(mincore); COND_SYSCALL(madvise); COND_SYSCALL(process_madvise); COND_SYSCALL(process_mrelease); +COND_SYSCALL(pmadv_ksm); COND_SYSCALL(remap_file_pages); COND_SYSCALL(mbind); COND_SYSCALL(get_mempolicy); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 730ab56d9e92e..6cf4e5c36ec92 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -91,6 +91,9 @@ #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT) #include #endif +#ifdef CONFIG_USER_NS +#include +#endif #if defined(CONFIG_SYSCTL) @@ -1814,6 +1817,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 17a283ce2b20f..2d76c91b85de4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void) return period; } +#define MAX_STALLED_JIFFIES 5 + static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { int cpu = smp_processor_id(); @@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) if (tick_do_timer_cpu == cpu) tick_do_update_jiffies64(now); + /* + * If jiffies update stalled for too long (timekeeper in stop_machine() + * or VMEXIT'ed for several msecs), force an update. + */ + if (ts->last_tick_jiffies != jiffies) { + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } else { + if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) { + tick_do_update_jiffies64(now); + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } + } + if (ts->inidle) ts->got_idle_tick = 1; } @@ -768,7 +785,7 @@ static inline bool local_timer_softirq_pending(void) static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { - u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; + u64 basemono, next_tick, delta, expires; unsigned long basejiff; unsigned int seq; @@ -791,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * minimal delta which brings us back to this place * immediately. Lather, rinse and repeat... */ - if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + if (rcu_needs_cpu() || arch_needs_cpu() || irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { @@ -802,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * disabled this also looks at the next expiring * hrtimer. */ - next_tmr = get_next_timer_interrupt(basejiff, basemono); - ts->next_timer = next_tmr; - /* Take the next rcu event into account */ - next_tick = next_rcu < next_tmr ? next_rcu : next_tmr; + next_tick = get_next_timer_interrupt(basejiff, basemono); + ts->next_timer = next_tick; } /* @@ -984,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) __tick_nohz_full_update_tick(ts, ktime_get()); } +/* + * A pending softirq outside an IRQ (or softirq disabled section) context + * should be waiting for ksoftirqd to handle it. Therefore we shouldn't + * reach here due to the need_resched() early check in can_stop_idle_tick(). + * + * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the + * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, + * triggering the below since wakep_softirqd() is ignored. + * + */ +static bool report_idle_softirq(void) +{ + static int ratelimit; + unsigned int pending = local_softirq_pending(); + + if (likely(!pending)) + return false; + + /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ + if (!cpu_active(smp_processor_id())) { + pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; + if (!pending) + return false; + } + + if (ratelimit < 10) + return false; + + /* On RT, softirqs handling may be waiting on some lock */ + if (!local_bh_blocked()) + return false; + + pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", + pending); + ratelimit++; + + return true; +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -1010,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (need_resched()) return false; - if (unlikely(local_softirq_pending())) { - static int ratelimit; - - if (ratelimit < 10 && !local_bh_blocked() && - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { - pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } + if (unlikely(report_idle_softirq())) return false; - } if (tick_nohz_full_enabled()) { /* diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index d952ae3934236..504649513399b 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -49,6 +49,8 @@ enum tick_nohz_mode { * @timer_expires_base: Base time clock monotonic for @timer_expires * @next_timer: Expiry time of next expiring timer for debugging purpose only * @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick + * @last_tick_jiffies: Value of jiffies seen on last tick + * @stalled_jiffies: Number of stalled jiffies detected across ticks */ struct tick_sched { struct hrtimer sched_timer; @@ -77,6 +79,8 @@ struct tick_sched { u64 next_timer; ktime_t idle_expires; atomic_t tick_dep_mask; + unsigned long last_tick_jiffies; + unsigned int stalled_jiffies; }; extern struct tick_sched *tick_get_tick_sched(int cpu); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index eb44418574f9c..96265a717ca4e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3663,12 +3663,17 @@ static char *trace_iter_expand_format(struct trace_iterator *iter) } /* Returns true if the string is safe to dereference from an event */ -static bool trace_safe_str(struct trace_iterator *iter, const char *str) +static bool trace_safe_str(struct trace_iterator *iter, const char *str, + bool star, int len) { unsigned long addr = (unsigned long)str; struct trace_event *trace_event; struct trace_event_call *event; + /* Ignore strings with no length */ + if (star && !len) + return true; + /* OK if part of the event data */ if ((addr >= (unsigned long)iter->ent) && (addr < (unsigned long)iter->ent + iter->ent_size)) @@ -3854,7 +3859,7 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, * instead. See samples/trace_events/trace-events-sample.h * for reference. */ - if (WARN_ONCE(!trace_safe_str(iter, str), + if (WARN_ONCE(!trace_safe_str(iter, str, star, len), "fmt: '%s' current_buffer: '%s'", fmt, show_buffer(&iter->seq))) { int ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3147614c1812a..25b5d0f9f3758 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -40,6 +40,14 @@ static LIST_HEAD(ftrace_generic_fields); static LIST_HEAD(ftrace_common_fields); static bool eventdir_initialized; +static LIST_HEAD(module_strings); + +struct module_string { + struct list_head next; + struct module *module; + char *str; +}; + #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) static struct kmem_cache *field_cachep; @@ -2633,6 +2641,76 @@ static void update_event_printk(struct trace_event_call *call, } } +static void add_str_to_module(struct module *module, char *str) +{ + struct module_string *modstr; + + modstr = kmalloc(sizeof(*modstr), GFP_KERNEL); + + /* + * If we failed to allocate memory here, then we'll just + * let the str memory leak when the module is removed. + * If this fails to allocate, there's worse problems than + * a leaked string on module removal. + */ + if (WARN_ON_ONCE(!modstr)) + return; + + modstr->module = module; + modstr->str = str; + + list_add(&modstr->next, &module_strings); +} + +static void update_event_fields(struct trace_event_call *call, + struct trace_eval_map *map) +{ + struct ftrace_event_field *field; + struct list_head *head; + char *ptr; + char *str; + int len = strlen(map->eval_string); + + /* Dynamic events should never have field maps */ + if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) + return; + + head = trace_get_fields(call); + list_for_each_entry(field, head, link) { + ptr = strchr(field->type, '['); + if (!ptr) + continue; + ptr++; + + if (!isalpha(*ptr) && *ptr != '_') + continue; + + if (strncmp(map->eval_string, ptr, len) != 0) + continue; + + str = kstrdup(field->type, GFP_KERNEL); + if (WARN_ON_ONCE(!str)) + return; + ptr = str + (ptr - field->type); + ptr = eval_replace(ptr, map, len); + /* enum/sizeof string smaller than value */ + if (WARN_ON_ONCE(!ptr)) { + kfree(str); + continue; + } + + /* + * If the event is part of a module, then we need to free the string + * when the module is removed. Otherwise, it will stay allocated + * until a reboot. + */ + if (call->module) + add_str_to_module(call->module, str); + + field->type = str; + } +} + void trace_event_eval_update(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; @@ -2668,6 +2746,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) first = false; } update_event_printk(call, map[i]); + update_event_fields(call, map[i]); } } } @@ -2853,6 +2932,7 @@ static void trace_module_add_events(struct module *mod) static void trace_module_remove_events(struct module *mod) { struct trace_event_call *call, *p; + struct module_string *modstr, *m; down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { @@ -2861,6 +2941,14 @@ static void trace_module_remove_events(struct module *mod) if (call->module == mod) __trace_remove_event_call(call); } + /* Check for any strings allocade for this module */ + list_for_each_entry_safe(modstr, m, &module_strings, next) { + if (modstr->module != mod) + continue; + list_del(&modstr->next); + kfree(modstr->str); + kfree(modstr); + } up_write(&trace_event_sem); /* diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 5481ba44a8d68..423ab2563ad75 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,6 +21,13 @@ #include #include +/* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else +int unprivileged_userns_clone; +#endif + static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 00703444a2194..230038d4f9081 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -271,7 +271,7 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) return 0; error_p: - for (i = 0; i < nr_pages; i++) + while (--i >= 0) __free_page(pages[i]); kfree(pages); error: @@ -370,6 +370,7 @@ static void __put_watch_queue(struct kref *kref) for (i = 0; i < wqueue->nr_pages; i++) __free_page(wqueue->notes[i]); + kfree(wqueue->notes); bitmap_free(wqueue->notes_bitmap); wfilter = rcu_access_pointer(wqueue->filter); @@ -395,6 +396,7 @@ static void free_watch(struct rcu_head *rcu) put_watch_queue(rcu_access_pointer(watch->queue)); atomic_dec(&watch->cred->user->nr_watches); put_cred(watch->cred); + kfree(watch); } static void __put_watch(struct kref *kref) diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c index be38a2c5ecc2b..42825941f19f2 100644 --- a/lib/kunit/try-catch.c +++ b/lib/kunit/try-catch.c @@ -52,7 +52,7 @@ static unsigned long kunit_test_timeout(void) * If tests timeout due to exceeding sysctl_hung_task_timeout_secs, * the task will be killed and an oops generated. */ - return 300 * MSEC_PER_SEC; /* 5 min */ + return 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */ } void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context) diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index a4c7cd74cff58..4fb7700a741bd 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -4,6 +4,8 @@ # from userspace. # +pound := \# + CC = gcc OPTFLAGS = -O2 # Adjust as desired CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) @@ -42,7 +44,7 @@ else ifeq ($(HAS_NEON),yes) OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else - HAS_ALTIVEC := $(shell printf '\#include \nvector int a;\n' |\ + HAS_ALTIVEC := $(shell printf '$(pound)include \nvector int a;\n' |\ gcc -c -x c - >/dev/null && rm ./-.o && echo yes) ifeq ($(HAS_ALTIVEC),yes) CFLAGS += -I../../../arch/powerpc/include diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index a3cf071941ab4..841a55242abaa 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -19,7 +19,6 @@ #define NDISKS 16 /* Including P and Q */ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); -struct raid6_calls raid6_call; char *dataptrs[NDISKS]; char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); diff --git a/lib/test_kmod.c b/lib/test_kmod.c index ce15893914131..cb800b1d0d99c 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1149,6 +1149,7 @@ static struct kmod_test_device *register_test_dev_kmod(void) if (ret) { pr_err("could not register misc device: %d\n", ret); free_test_dev_kmod(test_dev); + test_dev = NULL; goto out; } diff --git a/lib/test_lockup.c b/lib/test_lockup.c index 906b598740a7b..c3fd87d6c2dd0 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -417,9 +417,14 @@ static bool test_kernel_ptr(unsigned long addr, int size) return false; /* should be at least readable kernel address */ - if (access_ok(ptr, 1) || - access_ok(ptr + size - 1, 1) || - get_kernel_nofault(buf, ptr) || + if (!IS_ENABLED(CONFIG_ALTERNATE_USER_ADDRESS_SPACE) && + (access_ok((void __user *)ptr, 1) || + access_ok((void __user *)ptr + size - 1, 1))) { + pr_err("user space ptr invalid in kernel: %#lx\n", addr); + return true; + } + + if (get_kernel_nofault(buf, ptr) || get_kernel_nofault(buf, ptr + size - 1)) { pr_err("invalid kernel ptr: %#lx\n", addr); return true; diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 8b1c318189ce8..e77d4856442c3 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1463,6 +1463,25 @@ static noinline void check_create_range_4(struct xarray *xa, XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_create_range_5(struct xarray *xa, + unsigned long index, unsigned int order) +{ + XA_STATE_ORDER(xas, xa, index, order); + unsigned int i; + + xa_store_order(xa, index, order, xa_mk_index(index), GFP_KERNEL); + + for (i = 0; i < order + 10; i++) { + do { + xas_lock(&xas); + xas_create_range(&xas); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + } + + xa_destroy(xa); +} + static noinline void check_create_range(struct xarray *xa) { unsigned int order; @@ -1490,6 +1509,9 @@ static noinline void check_create_range(struct xarray *xa) check_create_range_4(xa, (3U << order) + 1, order); check_create_range_4(xa, (3U << order) - 1, order); check_create_range_4(xa, (1U << 24) + 1, order); + + check_create_range_5(xa, 0, order); + check_create_range_5(xa, (1U << order), order); } check_create_range_3(); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3b8129dd374cd..fbf261bbea950 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -49,10 +49,15 @@ #include /* for PAGE_SIZE */ #include /* cpu_to_le16 */ +#include #include #include "kstrtox.h" +/* Disable pointer hashing if requested */ +bool no_hash_pointers __ro_after_init; +EXPORT_SYMBOL_GPL(no_hash_pointers); + static noinline unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base) { const char *cp; @@ -848,6 +853,19 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, return pointer_string(buf, end, (const void *)hashval, spec); } +static char *default_pointer(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + /* + * default is to _not_ leak addresses, so hash before printing, + * unless no_hash_pointers is specified on the command line. + */ + if (unlikely(no_hash_pointers)) + return pointer_string(buf, end, ptr, spec); + + return ptr_to_id(buf, end, ptr, spec); +} + int kptr_restrict __read_mostly; static noinline_for_stack @@ -857,7 +875,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr, switch (kptr_restrict) { case 0: /* Handle as %p, hash and do _not_ leak addresses. */ - return ptr_to_id(buf, end, ptr, spec); + return default_pointer(buf, end, ptr, spec); case 1: { const struct cred *cred; @@ -1761,7 +1779,7 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc, char output[sizeof("0123 little-endian (0x01234567)")]; char *p = output; unsigned int i; - u32 val; + u32 orig, val; if (fmt[1] != 'c' || fmt[2] != 'c') return error_string(buf, end, "(%p4?)", spec); @@ -1769,21 +1787,22 @@ char *fourcc_string(char *buf, char *end, const u32 *fourcc, if (check_pointer(&buf, end, fourcc, spec)) return buf; - val = *fourcc & ~BIT(31); + orig = get_unaligned(fourcc); + val = orig & ~BIT(31); - for (i = 0; i < sizeof(*fourcc); i++) { + for (i = 0; i < sizeof(u32); i++) { unsigned char c = val >> (i * 8); /* Print non-control ASCII characters as-is, dot otherwise */ *p++ = isascii(c) && isprint(c) ? c : '.'; } - strcpy(p, *fourcc & BIT(31) ? " big-endian" : " little-endian"); + strcpy(p, orig & BIT(31) ? " big-endian" : " little-endian"); p += strlen(p); *p++ = ' '; *p++ = '('; - p = special_hex_number(p, output + sizeof(output) - 2, *fourcc, sizeof(u32)); + p = special_hex_number(p, output + sizeof(output) - 2, orig, sizeof(u32)); *p++ = ')'; *p = '\0'; @@ -2223,10 +2242,6 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } -/* Disable pointer hashing if requested */ -bool no_hash_pointers __ro_after_init; -EXPORT_SYMBOL_GPL(no_hash_pointers); - int __init no_hash_pointers_enable(char *str) { if (no_hash_pointers) @@ -2455,7 +2470,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'e': /* %pe with a non-ERR_PTR gets treated as plain %p */ if (!IS_ERR(ptr)) - break; + return default_pointer(buf, end, ptr, spec); return err_ptr(buf, end, ptr, spec); case 'u': case 'k': @@ -2465,16 +2480,9 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, default: return error_string(buf, end, "(einval)", spec); } + default: + return default_pointer(buf, end, ptr, spec); } - - /* - * default is to _not_ leak addresses, so hash before printing, - * unless no_hash_pointers is specified on the command line. - */ - if (unlikely(no_hash_pointers)) - return pointer_string(buf, end, ptr, spec); - else - return ptr_to_id(buf, end, ptr, spec); } /* diff --git a/lib/xarray.c b/lib/xarray.c index 6f47f6375808a..88ca87435e3da 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -722,6 +722,8 @@ void xas_create_range(struct xa_state *xas) for (;;) { struct xa_node *node = xas->xa_node; + if (node->shift >= shift) + break; xas->xa_node = xa_parent_locked(xas->xa, node); xas->xa_offset = node->offset - 1; if (node->offset != 0) @@ -1079,6 +1081,7 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) xa_mk_node(child)); if (xa_is_value(curr)) values--; + xas_update(xas, child); } else { unsigned int canon = offset - xas->xa_sibs; @@ -1093,6 +1096,7 @@ void xas_split(struct xa_state *xas, void *entry, unsigned int order) } while (offset-- > xas->xa_offset); node->nr_values += values; + xas_update(xas, node); } EXPORT_SYMBOL_GPL(xas_split); #endif diff --git a/mm/Kconfig b/mm/Kconfig index 3326ee3903f33..7fd84e0384dc6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -892,6 +892,32 @@ config ANON_VMA_NAME area from being merged with adjacent virtual memory areas due to the difference in their name. +# the multi-gen LRU { +config LRU_GEN + bool "Multi-Gen LRU" + depends on MMU + # the following options can use up the spare bits in page flags + depends on !MAXSMP && (64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP) + help + A high performance LRU implementation for memory overcommit. See + Documentation/admin-guide/mm/multigen_lru.rst for details. + +config LRU_GEN_ENABLED + bool "Enable by default" + depends on LRU_GEN + help + This option enables the multi-gen LRU by default. + +config LRU_GEN_STATS + bool "Full stats for debugging" + depends on LRU_GEN + help + Do not enable this option unless you plan to look at historical stats + from evicted generations for debugging purpose. + + This option has a per-memcg and per-node memory overhead. +# } + source "mm/damon/Kconfig" endmenu diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 406a3c28c0266..3df389fd307fb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2364,7 +2364,8 @@ static void __split_huge_page_tail(struct page *head, int tail, #ifdef CONFIG_64BIT (1L << PG_arch_2) | #endif - (1L << PG_dirty))); + (1L << PG_dirty) | + LRU_GEN_MASK | LRU_REFS_MASK)); /* ->mapping in first tail page is compound_mapcount */ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 7580baa76af1c..acd7cbb82e160 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -796,6 +796,8 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) unsigned long flags; struct kmemleak_object *object; struct kmemleak_scan_area *area = NULL; + unsigned long untagged_ptr; + unsigned long untagged_objp; object = find_and_get_object(ptr, 1); if (!object) { @@ -804,6 +806,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) return; } + untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); + untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); + if (scan_area_cache) area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); @@ -815,8 +820,8 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) goto out_unlock; } if (size == SIZE_MAX) { - size = object->pointer + object->size - ptr; - } else if (ptr + size > object->pointer + object->size) { + size = untagged_objp + object->size - untagged_ptr; + } else if (untagged_ptr + size > untagged_objp + object->size) { kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); dump_object_info(object); kmem_cache_free(scan_area_cache, area); diff --git a/mm/ksm.c b/mm/ksm.c index c20bd4d9a0d9e..cc8d9ddb304af 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2432,54 +2432,78 @@ static int ksm_scan_thread(void *nothing) return 0; } -int ksm_madvise(struct vm_area_struct *vma, unsigned long start, - unsigned long end, int advice, unsigned long *vm_flags) +int ksm_madvise_merge(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *vm_flags) { - struct mm_struct *mm = vma->vm_mm; int err; - switch (advice) { - case MADV_MERGEABLE: - /* - * Be somewhat over-protective for now! - */ - if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_HUGETLB | VM_MIXEDMAP)) - return 0; /* just ignore the advice */ + /* + * Be somewhat over-protective for now! + */ + if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_HUGETLB | VM_MIXEDMAP)) + return 0; /* just ignore the advice */ - if (vma_is_dax(vma)) - return 0; + if (vma_is_dax(vma)) + return 0; #ifdef VM_SAO if (*vm_flags & VM_SAO) return 0; #endif #ifdef VM_SPARC_ADI - if (*vm_flags & VM_SPARC_ADI) - return 0; + if (*vm_flags & VM_SPARC_ADI) + return 0; #endif - if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { - err = __ksm_enter(mm); - if (err) - return err; - } + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { + err = __ksm_enter(mm); + if (err) + return err; + } - *vm_flags |= VM_MERGEABLE; - break; + *vm_flags |= VM_MERGEABLE; - case MADV_UNMERGEABLE: - if (!(*vm_flags & VM_MERGEABLE)) - return 0; /* just ignore the advice */ + return 0; +} - if (vma->anon_vma) { - err = unmerge_ksm_pages(vma, start, end); - if (err) - return err; - } +int ksm_madvise_unmerge(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long *vm_flags) +{ + int err; + + if (!(*vm_flags & VM_MERGEABLE)) + return 0; /* just ignore the advice */ + + if (vma->anon_vma) { + err = unmerge_ksm_pages(vma, start, end); + if (err) + return err; + } - *vm_flags &= ~VM_MERGEABLE; + *vm_flags &= ~VM_MERGEABLE; + + return 0; +} + +int ksm_madvise(struct vm_area_struct *vma, unsigned long start, + unsigned long end, int advice, unsigned long *vm_flags) +{ + struct mm_struct *mm = vma->vm_mm; + int err; + + switch (advice) { + case MADV_MERGEABLE: + err = ksm_madvise_merge(mm, vma, vm_flags); + if (err) + return err; + break; + + case MADV_UNMERGEABLE: + err = ksm_madvise_unmerge(vma, start, end, vm_flags); + if (err) + return err; break; } diff --git a/mm/madvise.c b/mm/madvise.c index 38d0f515d5486..8593fc3a06f3c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1433,8 +1433,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, iov_iter_advance(&iter, iovec.iov_len); } - if (ret == 0) - ret = total_len - iov_iter_count(&iter); + ret = (total_len - iov_iter_count(&iter)) ? : ret; release_mm: mmput(mm); @@ -1445,3 +1444,114 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, out: return ret; } + +SYSCALL_DEFINE3(pmadv_ksm, int, pidfd, int, behaviour, unsigned int, flags) +{ +#ifdef CONFIG_KSM + ssize_t ret; + struct pid *pid; + struct task_struct *task; + struct mm_struct *mm; + unsigned int f_flags; + struct vm_area_struct *vma; + + if (flags != 0) { + ret = -EINVAL; + goto out; + } + + switch (behaviour) { + case MADV_MERGEABLE: + case MADV_UNMERGEABLE: + break; + default: + ret = -EINVAL; + goto out; + break; + } + + pid = pidfd_get_pid(pidfd, &f_flags); + if (IS_ERR(pid)) { + ret = PTR_ERR(pid); + goto out; + } + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) { + ret = -ESRCH; + goto put_pid; + } + + /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); + if (IS_ERR_OR_NULL(mm)) { + ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + goto release_task; + } + + /* Require CAP_SYS_NICE for influencing process performance. */ + if (!capable(CAP_SYS_NICE)) { + ret = -EPERM; + goto release_mm; + } + + if (mmap_write_lock_killable(mm)) { + ret = -EINTR; + goto release_mm; + } + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + switch (behaviour) { + case MADV_MERGEABLE: + ret = ksm_madvise_merge(vma->vm_mm, vma, &vma->vm_flags); + break; + case MADV_UNMERGEABLE: + ret = ksm_madvise_unmerge(vma, vma->vm_start, vma->vm_end, &vma->vm_flags); + break; + default: + /* look, ma, no brain */ + break; + } + if (ret) + break; + } + + mmap_write_unlock(mm); + +release_mm: + mmput(mm); +release_task: + put_task_struct(task); +put_pid: + put_pid(pid); +out: + return ret; +#else /* CONFIG_KSM */ + return -ENOSYS; +#endif /* CONFIG_KSM */ +} + +#ifdef CONFIG_KSM +static ssize_t ksm_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", __NR_pmadv_ksm); +} +static struct kobj_attribute pmadv_ksm_attr = __ATTR_RO(ksm); + +static struct attribute *pmadv_sysfs_attrs[] = { + &pmadv_ksm_attr.attr, + NULL, +}; + +static const struct attribute_group pmadv_sysfs_attr_group = { + .attrs = pmadv_sysfs_attrs, + .name = "pmadv", +}; + +static int __init pmadv_sysfs_init(void) +{ + return sysfs_create_group(kernel_kobj, &pmadv_sysfs_attr_group); +} +subsys_initcall(pmadv_sysfs_init); +#endif /* CONFIG_KSM */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36e9f38c919d0..1feb89ac0170c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2744,6 +2744,7 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() */ folio->memcg_data = (unsigned long)memcg; } @@ -5121,6 +5122,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) static void mem_cgroup_free(struct mem_cgroup *memcg) { + lru_gen_exit_memcg(memcg); memcg_wb_domain_exit(memcg); __mem_cgroup_free(memcg); } @@ -5180,6 +5182,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) memcg->deferred_split_queue.split_queue_len = 0; #endif idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); + lru_gen_init_memcg(memcg); return memcg; fail: mem_cgroup_id_remove(memcg); @@ -6152,6 +6155,29 @@ static void mem_cgroup_move_task(void) } #endif +#ifdef CONFIG_LRU_GEN +static void mem_cgroup_attach(struct cgroup_taskset *tset) +{ + struct cgroup_subsys_state *css; + struct task_struct *task = NULL; + + cgroup_taskset_for_each_leader(task, css, tset) + break; + + if (!task) + return; + + task_lock(task); + if (task->mm && task->mm->owner == task) + lru_gen_migrate_mm(task->mm); + task_unlock(task); +} +#else +static void mem_cgroup_attach(struct cgroup_taskset *tset) +{ +} +#endif /* CONFIG_LRU_GEN */ + static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value) { if (value == PAGE_COUNTER_MAX) @@ -6497,6 +6523,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .css_reset = mem_cgroup_css_reset, .css_rstat_flush = mem_cgroup_css_rstat_flush, .can_attach = mem_cgroup_can_attach, + .attach = mem_cgroup_attach, .cancel_attach = mem_cgroup_cancel_attach, .post_attach = mem_cgroup_move_task, .dfl_cftypes = memory_files, @@ -7053,7 +7080,7 @@ static int __init cgroup_memory(char *s) if (!strcmp(token, "nokmem")) cgroup_memory_nokmem = true; } - return 0; + return 1; } __setup("cgroup.memory=", cgroup_memory); diff --git a/mm/memory.c b/mm/memory.c index c125c4969913a..3cb140550cf41 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -122,18 +122,6 @@ int randomize_va_space __read_mostly = 2; #endif -#ifndef arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) -{ - /* - * Those arches which don't have hw access flag feature need to - * implement their own helper. By default, "true" means pagefault - * will be hit on old pte. - */ - return true; -} -#endif - #ifndef arch_wants_old_prefaulted_pte static inline bool arch_wants_old_prefaulted_pte(void) { @@ -1313,6 +1301,17 @@ struct zap_details { struct folio *single_folio; /* Locked folio to be unmapped */ }; +/* Whether we should zap all COWed (private) pages too */ +static inline bool should_zap_cows(struct zap_details *details) +{ + /* By default, zap all pages */ + if (!details) + return true; + + /* Or, we zap COWed pages only if the caller wants to */ + return !details->zap_mapping; +} + /* * We set details->zap_mapping when we want to unmap shared but keep private * pages. Return true if skip zapping this page, false otherwise. @@ -1320,11 +1319,15 @@ struct zap_details { static inline bool zap_skip_check_mapping(struct zap_details *details, struct page *page) { - if (!details || !page) + /* If we can make a decision without *page.. */ + if (should_zap_cows(details)) + return false; + + /* E.g. the caller passes NULL for the case of a zero page */ + if (!page) return false; - return details->zap_mapping && - (details->zap_mapping != page_rmapping(page)); + return details->zap_mapping != page_rmapping(page); } static unsigned long zap_pte_range(struct mmu_gather *tlb, @@ -1405,17 +1408,24 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, continue; } - /* If details->check_mapping, we leave swap entries. */ - if (unlikely(details)) - continue; - - if (!non_swap_entry(entry)) + if (!non_swap_entry(entry)) { + /* Genuine swap entry, hence a private anon page */ + if (!should_zap_cows(details)) + continue; rss[MM_SWAPENTS]--; - else if (is_migration_entry(entry)) { + } else if (is_migration_entry(entry)) { struct page *page; page = pfn_swap_entry_to_page(entry); + if (zap_skip_check_mapping(details, page)) + continue; rss[mm_counter(page)]--; + } else if (is_hwpoison_entry(entry)) { + if (!should_zap_cows(details)) + continue; + } else { + /* We should have covered all the swap entry types */ + WARN_ON_ONCE(1); } if (unlikely(!free_swap_and_cache(entry))) print_bad_pte(vma, addr, ptent, NULL); @@ -2778,7 +2788,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ - if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { + if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); @@ -3871,11 +3881,20 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) return ret; if (unlikely(PageHWPoison(vmf->page))) { - if (ret & VM_FAULT_LOCKED) - unlock_page(vmf->page); - put_page(vmf->page); + struct page *page = vmf->page; + vm_fault_t poisonret = VM_FAULT_HWPOISON; + if (ret & VM_FAULT_LOCKED) { + if (page_mapped(page)) + unmap_mapping_pages(page_mapping(page), + page->index, 1, false); + /* Retry if a clean page was removed from the cache. */ + if (invalidate_inode_page(page)) + poisonret = VM_FAULT_NOPAGE; + unlock_page(page); + } + put_page(page); vmf->page = NULL; - return VM_FAULT_HWPOISON; + return poisonret; } if (unlikely(!(ret & VM_FAULT_LOCKED))) @@ -4766,6 +4785,27 @@ static inline void mm_account_fault(struct pt_regs *regs, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } +#ifdef CONFIG_LRU_GEN +static void lru_gen_enter_fault(struct vm_area_struct *vma) +{ + /* the LRU algorithm doesn't apply to sequential or random reads */ + current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)); +} + +static void lru_gen_exit_fault(void) +{ + current->in_lru_fault = false; +} +#else +static void lru_gen_enter_fault(struct vm_area_struct *vma) +{ +} + +static void lru_gen_exit_fault(void) +{ +} +#endif /* CONFIG_LRU_GEN */ + /* * By the time we get here, we already hold the mm semaphore * @@ -4797,11 +4837,15 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (flags & FAULT_FLAG_USER) mem_cgroup_enter_user_fault(); + lru_gen_enter_fault(vma); + if (unlikely(is_vm_hugetlb_page(vma))) ret = hugetlb_fault(vma->vm_mm, vma, address, flags); else ret = __handle_mm_fault(vma, address, flags); + lru_gen_exit_fault(); + if (flags & FAULT_FLAG_USER) { mem_cgroup_exit_user_fault(); /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 69284d3b5e53f..1628cd90d9fcc 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -786,7 +786,6 @@ static int vma_replace_policy(struct vm_area_struct *vma, static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct mempolicy *new_pol) { - struct vm_area_struct *next; struct vm_area_struct *prev; struct vm_area_struct *vma; int err = 0; @@ -801,8 +800,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (start > vma->vm_start) prev = vma; - for (; vma && vma->vm_start < end; prev = vma, vma = next) { - next = vma->vm_next; + for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) { vmstart = max(start, vma->vm_start); vmend = min(end, vma->vm_end); @@ -817,10 +815,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, anon_vma_name(vma)); if (prev) { vma = prev; - next = vma->vm_next; - if (mpol_equal(vma_policy(vma), new_pol)) - continue; - /* vma_merge() joined vma && vma->next, case 8 */ goto replace; } if (vma->vm_start != vmstart) { diff --git a/mm/migrate.c b/mm/migrate.c index c7da064b4781b..086a366374678 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -3190,7 +3190,7 @@ static void __set_migration_target_nodes(void) /* * For callers that do not hold get_online_mems() already. */ -static void set_migration_target_nodes(void) +void set_migration_target_nodes(void) { get_online_mems(); __set_migration_target_nodes(); @@ -3254,51 +3254,24 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, return notifier_from_errno(0); } -/* - * React to hotplug events that might affect the migration targets - * like events that online or offline NUMA nodes. - * - * The ordering is also currently dependent on which nodes have - * CPUs. That means we need CPU on/offline notification too. - */ -static int migration_online_cpu(unsigned int cpu) -{ - set_migration_target_nodes(); - return 0; -} - -static int migration_offline_cpu(unsigned int cpu) +void __init migrate_on_reclaim_init(void) { - set_migration_target_nodes(); - return 0; -} - -static int __init migrate_on_reclaim_init(void) -{ - int ret; - node_demotion = kmalloc_array(nr_node_ids, sizeof(struct demotion_nodes), GFP_KERNEL); WARN_ON(!node_demotion); - ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline", - NULL, migration_offline_cpu); + hotplug_memory_notifier(migrate_on_reclaim_callback, 100); /* - * In the unlikely case that this fails, the automatic - * migration targets may become suboptimal for nodes - * where N_CPU changes. With such a small impact in a - * rare case, do not bother trying to do anything special. + * At this point, all numa nodes with memory/CPus have their state + * properly set, so we can build the demotion order now. + * Let us hold the cpu_hotplug lock just, as we could possibily have + * CPU hotplug events during boot. */ - WARN_ON(ret < 0); - ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online", - migration_online_cpu, NULL); - WARN_ON(ret < 0); - - hotplug_memory_notifier(migrate_on_reclaim_callback, 100); - return 0; + cpus_read_lock(); + set_migration_target_nodes(); + cpus_read_unlock(); } -late_initcall(migrate_on_reclaim_init); #endif /* CONFIG_HOTPLUG_CPU */ bool numa_demotion_enabled = false; diff --git a/mm/mlock.c b/mm/mlock.c index 25934e7db3e10..37f969ec68fa4 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -827,13 +827,12 @@ int user_shm_lock(size_t size, struct ucounts *ucounts) locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; lock_limit = rlimit(RLIMIT_MEMLOCK); - if (lock_limit == RLIM_INFINITY) - allowed = 1; - lock_limit >>= PAGE_SHIFT; + if (lock_limit != RLIM_INFINITY) + lock_limit >>= PAGE_SHIFT; spin_lock(&shmlock_user_lock); memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); - if (!allowed && (memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) { + if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) { dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); goto out; } diff --git a/mm/mm_init.c b/mm/mm_init.c index 9ddaf0e1b0ab9..0d7b2bd2454a1 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layout(void) shift = 8 * sizeof(unsigned long); width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH; + - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH; mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", - "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n", + "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n", SECTIONS_WIDTH, NODES_WIDTH, ZONES_WIDTH, LAST_CPUPID_WIDTH, KASAN_TAG_WIDTH, + LRU_GEN_WIDTH, + LRU_REFS_WIDTH, NR_PAGEFLAGS); mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n", diff --git a/mm/mmap.c b/mm/mmap.c index f61a15474dd6d..18875c216f8db 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2557,7 +2557,7 @@ static int __init cmdline_parse_stack_guard_gap(char *p) if (!*endptr) stack_guard_gap = val << PAGE_SHIFT; - return 0; + return 1; } __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap); diff --git a/mm/mmzone.c b/mm/mmzone.c index eb89d6e018e29..2ec0d77934244 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -81,6 +81,8 @@ void lruvec_init(struct lruvec *lruvec) for_each_lru(lru) INIT_LIST_HEAD(&lruvec->lists[lru]); + + lru_gen_init_lruvec(lruvec); } #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3589febc6d319..a1fbf656e7dbd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7972,10 +7972,17 @@ static void __init find_zone_movable_pfns_for_nodes(void) out2: /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ - for (nid = 0; nid < MAX_NUMNODES; nid++) + for (nid = 0; nid < MAX_NUMNODES; nid++) { + unsigned long start_pfn, end_pfn; + zone_movable_pfn[nid] = roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + if (zone_movable_pfn[nid] >= end_pfn) + zone_movable_pfn[nid] = 0; + } + out: /* restore the node_state */ node_states[N_MEMORY] = saved_node_state; diff --git a/mm/rmap.c b/mm/rmap.c index 6a1e8c7f62136..112e77dc62f41 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -73,6 +73,7 @@ #include #include #include +#include #include @@ -819,6 +820,12 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, } if (pvmw.pte) { + if (lru_gen_enabled() && pte_young(*pvmw.pte) && + !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) { + lru_gen_look_around(&pvmw); + referenced++; + } + if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { /* diff --git a/mm/slab.c b/mm/slab.c index ddf5737c63d90..a36af26e15216 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3421,6 +3421,7 @@ static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, if (is_kfence_address(objp)) { kmemleak_free_recursive(objp, cachep->flags); + memcg_slab_free_hook(cachep, &objp, 1); __kfence_free(objp); return; } diff --git a/mm/swap.c b/mm/swap.c index bcf3ac288b56d..e65e7520bebf9 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -344,7 +344,7 @@ static bool need_activate_page_drain(int cpu) return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; } -static void folio_activate(struct folio *folio) +void folio_activate(struct folio *folio) { if (folio_test_lru(folio) && !folio_test_active(folio) && !folio_test_unevictable(folio)) { @@ -364,7 +364,7 @@ static inline void activate_page_drain(int cpu) { } -static void folio_activate(struct folio *folio) +void folio_activate(struct folio *folio) { struct lruvec *lruvec; @@ -407,6 +407,43 @@ static void __lru_cache_activate_folio(struct folio *folio) local_unlock(&lru_pvecs.lock); } +#ifdef CONFIG_LRU_GEN +static void folio_inc_refs(struct folio *folio) +{ + unsigned long refs; + unsigned long old_flags, new_flags; + + if (folio_test_unevictable(folio)) + return; + + /* see the comment on MAX_NR_TIERS */ + do { + new_flags = old_flags = READ_ONCE(folio->flags); + + if (!(new_flags & BIT(PG_referenced))) { + new_flags |= BIT(PG_referenced); + continue; + } + + if (!(new_flags & BIT(PG_workingset))) { + new_flags |= BIT(PG_workingset); + continue; + } + + refs = new_flags & LRU_REFS_MASK; + refs = min(refs + BIT(LRU_REFS_PGOFF), LRU_REFS_MASK); + + new_flags &= ~LRU_REFS_MASK; + new_flags |= refs; + } while (new_flags != old_flags && + cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); +} +#else +static void folio_inc_refs(struct folio *folio) +{ +} +#endif /* CONFIG_LRU_GEN */ + /* * Mark a page as having seen activity. * @@ -419,6 +456,11 @@ static void __lru_cache_activate_folio(struct folio *folio) */ void folio_mark_accessed(struct folio *folio) { + if (lru_gen_enabled()) { + folio_inc_refs(folio); + return; + } + if (!folio_test_referenced(folio)) { folio_set_referenced(folio); } else if (folio_test_unevictable(folio)) { @@ -462,6 +504,11 @@ void folio_add_lru(struct folio *folio) VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + /* see the comment in lru_gen_add_folio() */ + if (lru_gen_enabled() && !folio_test_unevictable(folio) && + lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) + folio_set_active(folio); + folio_get(folio); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.lru_add); @@ -563,7 +610,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec) static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec) { - if (PageActive(page) && !PageUnevictable(page)) { + if (!PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) { int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec); @@ -677,7 +724,7 @@ void deactivate_file_page(struct page *page) */ void deactivate_page(struct page *page) { - if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { + if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) { struct pagevec *pvec; local_lock(&lru_pvecs.lock); diff --git a/mm/usercopy.c b/mm/usercopy.c index d0d268135d96d..21fd84ee7fcd4 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -295,7 +295,10 @@ static bool enable_checks __initdata = true; static int __init parse_hardened_usercopy(char *str) { - return strtobool(str, &enable_checks); + if (strtobool(str, &enable_checks)) + pr_warn("Invalid option string for hardened_usercopy: '%s'\n", + str); + return 1; } __setup("hardened_usercopy=", parse_hardened_usercopy); diff --git a/mm/vmscan.c b/mm/vmscan.c index 59b14e0d696c9..8f8f9ac2cd2c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -50,6 +50,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -125,6 +129,13 @@ struct scan_control { /* Always discard instead of demoting to lower tier memory */ unsigned int no_demotion:1; +#ifdef CONFIG_LRU_GEN + /* help make better choices when multiple memcgs are available */ + unsigned int memcgs_need_aging:1; + unsigned int memcgs_need_swapping:1; + unsigned int memcgs_avoid_swapping:1; +#endif + /* Allocation order */ s8 order; @@ -1287,9 +1298,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; - mem_cgroup_swapout(page, swap); + + /* get a shadow entry before mem_cgroup_swapout() clears folio_memcg() */ if (reclaimed && !mapping_exiting(mapping)) shadow = workingset_eviction(page, target_memcg); + mem_cgroup_swapout(page, swap); __delete_from_swap_cache(page, swap, shadow); xa_unlock_irq(&mapping->i_pages); put_swap_page(page, swap); @@ -1556,6 +1569,11 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (!sc->may_unmap && page_mapped(page)) goto keep_locked; + /* folio_update_gen() tried to promote this page? */ + if (lru_gen_enabled() && !ignore_references && + page_mapped(page) && PageReferenced(page)) + goto keep_locked; + may_enter_fs = (sc->gfp_mask & __GFP_FS) || (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); @@ -2718,6 +2736,112 @@ enum scan_balance { SCAN_FILE, }; +static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc) +{ + unsigned long file; + struct lruvec *target_lruvec; + + if (lru_gen_enabled()) + return; + + target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); + + /* + * Flush the memory cgroup stats, so that we read accurate per-memcg + * lruvec stats for heuristics. + */ + mem_cgroup_flush_stats(); + + /* + * Determine the scan balance between anon and file LRUs. + */ + spin_lock_irq(&target_lruvec->lru_lock); + sc->anon_cost = target_lruvec->anon_cost; + sc->file_cost = target_lruvec->file_cost; + spin_unlock_irq(&target_lruvec->lru_lock); + + /* + * Target desirable inactive:active list ratios for the anon + * and file LRU lists. + */ + if (!sc->force_deactivate) { + unsigned long refaults; + + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_ANON); + if (refaults != target_lruvec->refaults[0] || + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) + sc->may_deactivate |= DEACTIVATE_ANON; + else + sc->may_deactivate &= ~DEACTIVATE_ANON; + + /* + * When refaults are being observed, it means a new + * workingset is being established. Deactivate to get + * rid of any stale active pages quickly. + */ + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_FILE); + if (refaults != target_lruvec->refaults[1] || + inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) + sc->may_deactivate |= DEACTIVATE_FILE; + else + sc->may_deactivate &= ~DEACTIVATE_FILE; + } else + sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; + + /* + * If we have plenty of inactive file pages that aren't + * thrashing, try to reclaim those first before touching + * anonymous pages. + */ + file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); + if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) + sc->cache_trim_mode = 1; + else + sc->cache_trim_mode = 0; + + /* + * Prevent the reclaimer from falling into the cache trap: as + * cache pages start out inactive, every cache fault will tip + * the scan balance towards the file LRU. And as the file LRU + * shrinks, so does the window for rotation from references. + * This means we have a runaway feedback loop where a tiny + * thrashing file LRU becomes infinitely more attractive than + * anon pages. Try to detect this based on file LRU size. + */ + if (!cgroup_reclaim(sc)) { + unsigned long total_high_wmark = 0; + unsigned long free, anon; + int z; + + free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); + file = node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE); + + for (z = 0; z < MAX_NR_ZONES; z++) { + struct zone *zone = &pgdat->node_zones[z]; + + if (!managed_zone(zone)) + continue; + + total_high_wmark += high_wmark_pages(zone); + } + + /* + * Consider anon: if that's low too, this isn't a + * runaway file reclaim problem, but rather just + * extreme pressure. Reclaim as per usual then. + */ + anon = node_page_state(pgdat, NR_INACTIVE_ANON); + + sc->file_is_tiny = + file + free <= total_high_wmark && + !(sc->may_deactivate & DEACTIVATE_ANON) && + anon >> sc->priority; + } +} + /* * Determine how aggressively the anon and file LRU lists should be * scanned. The relative value of each set of LRU lists is determined @@ -2939,155 +3063,2750 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, return can_demote(pgdat->node_id, sc); } -static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) -{ - unsigned long nr[NR_LRU_LISTS]; - unsigned long targets[NR_LRU_LISTS]; - unsigned long nr_to_scan; - enum lru_list lru; - unsigned long nr_reclaimed = 0; - unsigned long nr_to_reclaim = sc->nr_to_reclaim; - struct blk_plug plug; - bool scan_adjusted; +#ifdef CONFIG_LRU_GEN - get_scan_count(lruvec, sc, nr); +#ifdef CONFIG_LRU_GEN_ENABLED +DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS); +#else +DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); +#endif - /* Record the original scan target for proportional adjustments later */ - memcpy(targets, nr, sizeof(nr)); +/****************************************************************************** + * shorthand helpers + ******************************************************************************/ - /* - * Global reclaiming within direct reclaim at DEF_PRIORITY is a normal - * event that can occur when there is little memory pressure e.g. - * multiple streaming readers/writers. Hence, we do not abort scanning - * when the requested number of pages are reclaimed when scanning at - * DEF_PRIORITY on the assumption that the fact we are direct - * reclaiming implies that kswapd is not keeping up and it is best to - * do a batch of work at once. For memcg reclaim one check is made to - * abort proportional reclaim if either the file or anon lru has already - * dropped to zero at the first pass. - */ - scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && - sc->priority == DEF_PRIORITY); +#define DEFINE_MAX_SEQ(lruvec) \ + unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq) - blk_start_plug(&plug); - while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || - nr[LRU_INACTIVE_FILE]) { - unsigned long nr_anon, nr_file, percentage; - unsigned long nr_scanned; +#define DEFINE_MIN_SEQ(lruvec) \ + unsigned long min_seq[ANON_AND_FILE] = { \ + READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_ANON]), \ + READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \ + } - for_each_evictable_lru(lru) { - if (nr[lru]) { - nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); - nr[lru] -= nr_to_scan; +#define for_each_gen_type_zone(gen, type, zone) \ + for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \ + for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \ + for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) - nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, sc); - } - } +static int folio_lru_gen(struct folio *folio) +{ + unsigned long flags = READ_ONCE(folio->flags); - cond_resched(); + return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} - if (nr_reclaimed < nr_to_reclaim || scan_adjusted) - continue; +static int folio_lru_tier(struct folio *folio) +{ + int refs; + unsigned long flags = READ_ONCE(folio->flags); - /* - * For kswapd and memcg, reclaim at least the number of pages - * requested. Ensure that the anon and file LRUs are scanned - * proportionally what was requested by get_scan_count(). We - * stop reclaiming one LRU and reduce the amount scanning - * proportional to the original scan target. - */ - nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; - nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; + refs = (flags & LRU_REFS_FLAGS) == LRU_REFS_FLAGS ? + ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1 : 0; - /* - * It's just vindictive to attack the larger once the smaller - * has gone to zero. And given the way we stop scanning the - * smaller below, this makes sure that we only make one nudge - * towards proportionality once we've got nr_to_reclaim. - */ - if (!nr_file || !nr_anon) - break; + return lru_tier_from_refs(refs); +} - if (nr_file > nr_anon) { - unsigned long scan_target = targets[LRU_INACTIVE_ANON] + - targets[LRU_ACTIVE_ANON] + 1; - lru = LRU_BASE; - percentage = nr_anon * 100 / scan_target; - } else { - unsigned long scan_target = targets[LRU_INACTIVE_FILE] + - targets[LRU_ACTIVE_FILE] + 1; - lru = LRU_FILE; - percentage = nr_file * 100 / scan_target; - } +static bool get_cap(int cap) +{ +#ifdef CONFIG_LRU_GEN_ENABLED + return static_branch_likely(&lru_gen_caps[cap]); +#else + return static_branch_unlikely(&lru_gen_caps[cap]); +#endif +} - /* Stop scanning the smaller of the LRU */ - nr[lru] = 0; - nr[lru + LRU_ACTIVE] = 0; +static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) +{ + struct pglist_data *pgdat = NODE_DATA(nid); - /* - * Recalculate the other LRU scan count based on its original - * scan target and the percentage scanning already complete - */ - lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; - nr_scanned = targets[lru] - nr[lru]; - nr[lru] = targets[lru] * (100 - percentage) / 100; - nr[lru] -= min(nr[lru], nr_scanned); +#ifdef CONFIG_MEMCG + if (memcg) { + struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec; - lru += LRU_ACTIVE; - nr_scanned = targets[lru] - nr[lru]; - nr[lru] = targets[lru] * (100 - percentage) / 100; - nr[lru] -= min(nr[lru], nr_scanned); + /* for hotadd_new_pgdat() */ + if (!lruvec->pgdat) + lruvec->pgdat = pgdat; - scan_adjusted = true; + return lruvec; } - blk_finish_plug(&plug); - sc->nr_reclaimed += nr_reclaimed; - - /* - * Even if we did not try to evict anon pages at all, we want to - * rebalance the anon lru active/inactive ratio. - */ - if (can_age_anon_pages(lruvec_pgdat(lruvec), sc) && - inactive_is_low(lruvec, LRU_INACTIVE_ANON)) - shrink_active_list(SWAP_CLUSTER_MAX, lruvec, - sc, LRU_ACTIVE_ANON); +#endif + return pgdat ? &pgdat->__lruvec : NULL; } -/* Use reclaim/compaction for costly allocs or under memory pressure */ -static bool in_reclaim_compaction(struct scan_control *sc) +static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc) { - if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && - (sc->order > PAGE_ALLOC_COSTLY_ORDER || - sc->priority < DEF_PRIORITY - 2)) - return true; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); - return false; + if (!can_demote(pgdat->node_id, sc) && + mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH) + return 0; + + return mem_cgroup_swappiness(memcg); } -/* - * Reclaim/compaction is used for high-order allocation requests. It reclaims - * order-0 pages before compacting the zone. should_continue_reclaim() returns - * true if more pages should be reclaimed such that when the page allocator - * calls try_to_compact_pages() that it will have enough free pages to succeed. - * It will give up earlier than that if there is difficulty reclaiming pages. - */ -static inline bool should_continue_reclaim(struct pglist_data *pgdat, - unsigned long nr_reclaimed, - struct scan_control *sc) +static int get_nr_gens(struct lruvec *lruvec, int type) { - unsigned long pages_for_compaction; - unsigned long inactive_lru_pages; - int z; + return lruvec->lrugen.max_seq - lruvec->lrugen.min_seq[type] + 1; +} - /* If not in reclaim/compaction mode, stop */ - if (!in_reclaim_compaction(sc)) - return false; +static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) +{ + /* see the comment on lru_gen_struct */ + return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS && + get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) && + get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS; +} - /* - * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX - * number of pages that were scanned. This will return to the caller - * with the risk reclaim/compaction and the resulting allocation attempt +/****************************************************************************** + * mm_struct list + ******************************************************************************/ + +static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) +{ + static struct lru_gen_mm_list mm_list = { + .fifo = LIST_HEAD_INIT(mm_list.fifo), + .lock = __SPIN_LOCK_UNLOCKED(mm_list.lock), + }; + +#ifdef CONFIG_MEMCG + if (memcg) + return &memcg->mm_list; +#endif + return &mm_list; +} + +void lru_gen_add_mm(struct mm_struct *mm) +{ + int nid; + struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + + VM_BUG_ON_MM(!list_empty(&mm->lru_gen.list), mm); +#ifdef CONFIG_MEMCG + VM_BUG_ON_MM(mm->lru_gen.memcg, mm); + mm->lru_gen.memcg = memcg; +#endif + spin_lock(&mm_list->lock); + + for_each_node_state(nid, N_MEMORY) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + if (lruvec->mm_state.tail == &mm_list->fifo) + lruvec->mm_state.tail = &mm->lru_gen.list; + } + + list_add_tail(&mm->lru_gen.list, &mm_list->fifo); + + spin_unlock(&mm_list->lock); +} + +void lru_gen_del_mm(struct mm_struct *mm) +{ + int nid; + struct lru_gen_mm_list *mm_list; + struct mem_cgroup *memcg = NULL; + + if (list_empty(&mm->lru_gen.list)) + return; + +#ifdef CONFIG_MEMCG + memcg = mm->lru_gen.memcg; +#endif + mm_list = get_mm_list(memcg); + + spin_lock(&mm_list->lock); + + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + if (lruvec->mm_state.tail == &mm->lru_gen.list) + lruvec->mm_state.tail = lruvec->mm_state.tail->next; + + if (lruvec->mm_state.head != &mm->lru_gen.list) + continue; + + lruvec->mm_state.head = lruvec->mm_state.head->next; + if (lruvec->mm_state.head == &mm_list->fifo) + WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1); + } + + list_del_init(&mm->lru_gen.list); + + spin_unlock(&mm_list->lock); + +#ifdef CONFIG_MEMCG + mem_cgroup_put(mm->lru_gen.memcg); + mm->lru_gen.memcg = NULL; +#endif +} + +#ifdef CONFIG_MEMCG +void lru_gen_migrate_mm(struct mm_struct *mm) +{ + struct mem_cgroup *memcg; + + lockdep_assert_held(&mm->owner->alloc_lock); + + /* for mm_update_next_owner() */ + if (mem_cgroup_disabled()) + return; + + rcu_read_lock(); + memcg = mem_cgroup_from_task(mm->owner); + rcu_read_unlock(); + if (memcg == mm->lru_gen.memcg) + return; + + VM_BUG_ON_MM(!mm->lru_gen.memcg, mm); + VM_BUG_ON_MM(list_empty(&mm->lru_gen.list), mm); + + lru_gen_del_mm(mm); + lru_gen_add_mm(mm); +} +#endif + +/* + * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when + * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of + * bits in a bitmap, k is the number of hash functions and n is the number of + * inserted items. + * + * Page table walkers use one of the two filters to reduce their search space. + * To get rid of non-leaf entries that no longer have enough leaf entries, the + * aging uses the double-buffering technique to flip to the other filter each + * time it produces a new generation. For non-leaf entries that have enough + * leaf entries, the aging carries them over to the next generation in + * walk_pmd_range(); the eviction also report them when walking the rmap + * in lru_gen_look_around(). + * + * For future optimizations: + * 1. It's not necessary to keep both filters all the time. The spare one can be + * freed after the RCU grace period and reallocated if needed again. + * 2. And when reallocating, it's worth scaling its size according to the number + * of inserted entries in the other filter, to reduce the memory overhead on + * small systems and false positives on large systems. + * 3. Jenkins' hash function is an alternative to Knuth's. + */ +#define BLOOM_FILTER_SHIFT 15 + +static inline int filter_gen_from_seq(unsigned long seq) +{ + return seq % NR_BLOOM_FILTERS; +} + +static void get_item_key(void *item, int *key) +{ + u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2); + + BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32)); + + key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1); + key[1] = hash >> BLOOM_FILTER_SHIFT; +} + +static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq) +{ + unsigned long *filter; + int gen = filter_gen_from_seq(seq); + + lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock); + + filter = lruvec->mm_state.filters[gen]; + if (filter) { + bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT)); + return; + } + + filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT), GFP_ATOMIC); + WRITE_ONCE(lruvec->mm_state.filters[gen], filter); +} + +static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +{ + int key[2]; + unsigned long *filter; + int gen = filter_gen_from_seq(seq); + + filter = READ_ONCE(lruvec->mm_state.filters[gen]); + if (!filter) + return; + + get_item_key(item, key); + + if (!test_bit(key[0], filter)) + set_bit(key[0], filter); + if (!test_bit(key[1], filter)) + set_bit(key[1], filter); +} + +static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +{ + int key[2]; + unsigned long *filter; + int gen = filter_gen_from_seq(seq); + + filter = READ_ONCE(lruvec->mm_state.filters[gen]); + if (!filter) + return true; + + get_item_key(item, key); + + return test_bit(key[0], filter) && test_bit(key[1], filter); +} + +static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last) +{ + int i; + int hist; + + lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock); + + if (walk) { + hist = lru_hist_from_seq(walk->max_seq); + + for (i = 0; i < NR_MM_STATS; i++) { + WRITE_ONCE(lruvec->mm_state.stats[hist][i], + lruvec->mm_state.stats[hist][i] + walk->mm_stats[i]); + walk->mm_stats[i] = 0; + } + } + + if (NR_HIST_GENS > 1 && last) { + hist = lru_hist_from_seq(lruvec->mm_state.seq + 1); + + for (i = 0; i < NR_MM_STATS; i++) + WRITE_ONCE(lruvec->mm_state.stats[hist][i], 0); + } +} + +static bool should_skip_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk) +{ + int type; + unsigned long size = 0; + struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); + + if (!walk->full_scan && cpumask_empty(mm_cpumask(mm)) && + !node_isset(pgdat->node_id, mm->lru_gen.nodes)) + return true; + + node_clear(pgdat->node_id, mm->lru_gen.nodes); + + for (type = !walk->can_swap; type < ANON_AND_FILE; type++) { + size += type ? get_mm_counter(mm, MM_FILEPAGES) : + get_mm_counter(mm, MM_ANONPAGES) + + get_mm_counter(mm, MM_SHMEMPAGES); + } + + if (size < MIN_LRU_BATCH) + return true; + + if (mm_is_oom_victim(mm)) + return true; + + return !mmget_not_zero(mm); +} + +static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, + struct mm_struct **iter) +{ + bool first = false; + bool last = true; + struct mm_struct *mm = NULL; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + + /* + * There are four interesting cases for this page table walker: + * 1. It tries to start a new iteration of mm_list with a stale max_seq; + * there is nothing to be done. + * 2. It's the first of the current generation, and it needs to reset + * the Bloom filter for the next generation. + * 3. It reaches the end of mm_list, and it needs to increment + * mm_state->seq; the iteration is done. + * 4. It's the last of the current generation, and it needs to reset the + * mm stats counters for the next generation. + */ + if (*iter) + mmput_async(*iter); + else if (walk->max_seq <= READ_ONCE(mm_state->seq)) + return false; + + spin_lock(&mm_list->lock); + + VM_BUG_ON(mm_state->seq + 1 < walk->max_seq); + VM_BUG_ON(*iter && mm_state->seq > walk->max_seq); + VM_BUG_ON(*iter && !mm_state->nr_walkers); + + if (walk->max_seq <= mm_state->seq) { + if (!*iter) + last = false; + goto done; + } + + if (!mm_state->nr_walkers) { + VM_BUG_ON(mm_state->head && mm_state->head != &mm_list->fifo); + + mm_state->head = mm_list->fifo.next; + first = true; + } + + while (!mm && mm_state->head != &mm_list->fifo) { + mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); + + mm_state->head = mm_state->head->next; + + /* full scan for those added after the last iteration */ + if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) { + mm_state->tail = mm_state->head; + walk->full_scan = true; + } + + if (should_skip_mm(mm, walk)) + mm = NULL; + } + + if (mm_state->head == &mm_list->fifo) + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); +done: + if (*iter && !mm) + mm_state->nr_walkers--; + if (!*iter && mm) + mm_state->nr_walkers++; + + if (mm_state->nr_walkers) + last = false; + + if (mm && first) + reset_bloom_filter(lruvec, walk->max_seq + 1); + + if (*iter || last) + reset_mm_stats(lruvec, walk, last); + + spin_unlock(&mm_list->lock); + + *iter = mm; + + return last; +} + +static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) +{ + bool success = false; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + + if (max_seq <= READ_ONCE(mm_state->seq)) + return false; + + spin_lock(&mm_list->lock); + + VM_BUG_ON(mm_state->seq + 1 < max_seq); + + if (max_seq > mm_state->seq && !mm_state->nr_walkers) { + VM_BUG_ON(mm_state->head && mm_state->head != &mm_list->fifo); + + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); + reset_mm_stats(lruvec, NULL, true); + success = true; + } + + spin_unlock(&mm_list->lock); + + return success; +} + +/****************************************************************************** + * refault feedback loop + ******************************************************************************/ + +/* + * A feedback loop based on Proportional-Integral-Derivative (PID) controller. + * + * The P term is refaulted/(evicted+protected) from a tier in the generation + * currently being evicted; the I term is the exponential moving average of the + * P term over the generations previously evicted, using the smoothing factor + * 1/2; the D term isn't supported. + * + * The setpoint (SP) is always the first tier of one type; the process variable + * (PV) is either any tier of the other type or any other tier of the same + * type. + * + * The error is the difference between the SP and the PV; the correction is + * turn off protection when SP>PV or turn on protection when SPlrugen; + int hist = lru_hist_from_seq(lrugen->min_seq[type]); + + pos->refaulted = lrugen->avg_refaulted[type][tier] + + atomic_long_read(&lrugen->refaulted[hist][type][tier]); + pos->total = lrugen->avg_total[type][tier] + + atomic_long_read(&lrugen->evicted[hist][type][tier]); + if (tier) + pos->total += lrugen->protected[hist][type][tier - 1]; + pos->gain = gain; +} + +static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover) +{ + int hist, tier; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1; + unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1; + + lockdep_assert_held(&lruvec->lru_lock); + + if (!carryover && !clear) + return; + + hist = lru_hist_from_seq(seq); + + for (tier = 0; tier < MAX_NR_TIERS; tier++) { + if (carryover) { + unsigned long sum; + + sum = lrugen->avg_refaulted[type][tier] + + atomic_long_read(&lrugen->refaulted[hist][type][tier]); + WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2); + + sum = lrugen->avg_total[type][tier] + + atomic_long_read(&lrugen->evicted[hist][type][tier]); + if (tier) + sum += lrugen->protected[hist][type][tier - 1]; + WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2); + } + + if (clear) { + atomic_long_set(&lrugen->refaulted[hist][type][tier], 0); + atomic_long_set(&lrugen->evicted[hist][type][tier], 0); + if (tier) + WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0); + } + } +} + +static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv) +{ + /* + * Return true if the PV has a limited number of refaults or a lower + * refaulted/total than the SP. + */ + return pv->refaulted < MIN_LRU_BATCH || + pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <= + (sp->refaulted + 1) * pv->total * pv->gain; +} + +/****************************************************************************** + * the aging + ******************************************************************************/ + +static int folio_update_gen(struct folio *folio, int gen) +{ + unsigned long old_flags, new_flags; + + VM_BUG_ON(gen >= MAX_NR_GENS); + VM_BUG_ON(!rcu_read_lock_held()); + + do { + new_flags = old_flags = READ_ONCE(folio->flags); + + /* for shrink_page_list() */ + if (!(new_flags & LRU_GEN_MASK)) { + new_flags |= BIT(PG_referenced); + continue; + } + + new_flags &= ~LRU_GEN_MASK; + new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + } while (new_flags != old_flags && + cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); + + return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} + +static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) +{ + unsigned long old_flags, new_flags; + int type = folio_is_file_lru(folio); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); + + do { + new_flags = old_flags = READ_ONCE(folio->flags); + VM_BUG_ON_FOLIO(!(new_flags & LRU_GEN_MASK), folio); + + new_gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + /* folio_update_gen() has promoted this page? */ + if (new_gen >= 0 && new_gen != old_gen) + return new_gen; + + new_gen = (old_gen + 1) % MAX_NR_GENS; + + new_flags &= ~LRU_GEN_MASK; + new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF; + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + /* for folio_end_writeback() */ + if (reclaiming) + new_flags |= BIT(PG_reclaim); + } while (cmpxchg(&folio->flags, old_flags, new_flags) != old_flags); + + lru_gen_update_size(lruvec, folio, old_gen, new_gen); + + return new_gen; +} + +static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio, + int old_gen, int new_gen) +{ + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + int delta = folio_nr_pages(folio); + + VM_BUG_ON(old_gen >= MAX_NR_GENS); + VM_BUG_ON(new_gen >= MAX_NR_GENS); + + walk->batched++; + + walk->nr_pages[old_gen][type][zone] -= delta; + walk->nr_pages[new_gen][type][zone] += delta; +} + +static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk) +{ + int gen, type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + walk->batched = 0; + + for_each_gen_type_zone(gen, type, zone) { + enum lru_list lru = type * LRU_INACTIVE_FILE; + int delta = walk->nr_pages[gen][type][zone]; + + if (!delta) + continue; + + walk->nr_pages[gen][type][zone] = 0; + WRITE_ONCE(lrugen->nr_pages[gen][type][zone], + lrugen->nr_pages[gen][type][zone] + delta); + + if (lru_gen_is_active(lruvec, gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, delta); + } +} + +static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *walk) +{ + struct address_space *mapping; + struct vm_area_struct *vma = walk->vma; + struct lru_gen_mm_walk *priv = walk->private; + + if (!vma_is_accessible(vma) || is_vm_hugetlb_page(vma) || + (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ)) || + vma == get_gate_vma(vma->vm_mm)) + return true; + + if (vma_is_anonymous(vma)) + return !priv->can_swap; + + if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping)) + return true; + + mapping = vma->vm_file->f_mapping; + if (mapping_unevictable(mapping)) + return true; + + /* check readpage to exclude special mappings like dax, etc. */ + return shmem_mapping(mapping) ? !priv->can_swap : !mapping->a_ops->readpage; +} + +/* + * Some userspace memory allocators map many single-page VMAs. Instead of + * returning back to the PGD table for each of such VMAs, finish an entire PMD + * table to reduce zigzags and improve cache performance. + */ +static bool get_next_vma(struct mm_walk *walk, unsigned long mask, unsigned long size, + unsigned long *start, unsigned long *end) +{ + unsigned long next = round_up(*end, size); + + VM_BUG_ON(mask & size); + VM_BUG_ON(*start >= *end); + VM_BUG_ON((next & mask) != (*start & mask)); + + while (walk->vma) { + if (next >= walk->vma->vm_end) { + walk->vma = walk->vma->vm_next; + continue; + } + + if ((next & mask) != (walk->vma->vm_start & mask)) + return false; + + if (should_skip_vma(walk->vma->vm_start, walk->vma->vm_end, walk)) { + walk->vma = walk->vma->vm_next; + continue; + } + + *start = max(next, walk->vma->vm_start); + next = (next | ~mask) + 1; + /* rounded-up boundaries can wrap to 0 */ + *end = next && next < walk->vma->vm_end ? next : walk->vma->vm_end; + + return true; + } + + return false; +} + +static bool suitable_to_scan(int total, int young) +{ + int n = clamp_t(int, cache_line_size() / sizeof(pte_t), 2, 8); + + /* suitable if the average number of young PTEs per cacheline is >=1 */ + return young * n >= total; +} + +static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int i; + pte_t *pte; + spinlock_t *ptl; + unsigned long addr; + int total = 0; + int young = 0; + struct lru_gen_mm_walk *priv = walk->private; + struct mem_cgroup *memcg = lruvec_memcg(priv->lruvec); + struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + int old_gen, new_gen = lru_gen_from_seq(priv->max_seq); + + VM_BUG_ON(pmd_leaf(*pmd)); + + pte = pte_offset_map_lock(walk->mm, pmd, start & PMD_MASK, &ptl); + arch_enter_lazy_mmu_mode(); +restart: + for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) { + struct folio *folio; + unsigned long pfn = pte_pfn(pte[i]); + + VM_BUG_ON(addr < walk->vma->vm_start || addr >= walk->vma->vm_end); + + total++; + priv->mm_stats[MM_PTE_TOTAL]++; + + if (!pte_present(pte[i]) || is_zero_pfn(pfn)) + continue; + + if (WARN_ON_ONCE(pte_devmap(pte[i]) || pte_special(pte[i]))) + continue; + + if (!pte_young(pte[i])) { + priv->mm_stats[MM_PTE_OLD]++; + continue; + } + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + folio = pfn_folio(pfn); + if (folio_nid(folio) != pgdat->node_id) + continue; + + if (folio_memcg_rcu(folio) != memcg) + continue; + + if (!ptep_test_and_clear_young(walk->vma, addr, pte + i)) + continue; + + young++; + priv->mm_stats[MM_PTE_YOUNG]++; + + if (pte_dirty(pte[i]) && !folio_test_dirty(folio) && + !(folio_test_anon(folio) && folio_test_swapbacked(folio) && + !folio_test_swapcache(folio))) + folio_mark_dirty(folio); + + old_gen = folio_update_gen(folio, new_gen); + if (old_gen >= 0 && old_gen != new_gen) + update_batch_size(priv, folio, old_gen, new_gen); + } + + if (i < PTRS_PER_PTE && get_next_vma(walk, PMD_MASK, PAGE_SIZE, &start, &end)) + goto restart; + + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(pte, ptl); + + return suitable_to_scan(total, young); +} + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) +static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, + struct mm_walk *walk, unsigned long *start) +{ + int i; + pmd_t *pmd; + spinlock_t *ptl; + struct lru_gen_mm_walk *priv = walk->private; + struct mem_cgroup *memcg = lruvec_memcg(priv->lruvec); + struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + int old_gen, new_gen = lru_gen_from_seq(priv->max_seq); + + VM_BUG_ON(pud_leaf(*pud)); + + /* try to batch at most 1+MIN_LRU_BATCH+1 entries */ + if (*start == -1) { + *start = next; + return; + } + + i = next == -1 ? 0 : pmd_index(next) - pmd_index(*start); + if (i && i <= MIN_LRU_BATCH) { + __set_bit(i - 1, priv->bitmap); + return; + } + + pmd = pmd_offset(pud, *start); + ptl = pmd_lock(walk->mm, pmd); + arch_enter_lazy_mmu_mode(); + + do { + struct folio *folio; + unsigned long pfn = pmd_pfn(pmd[i]); + unsigned long addr = i ? (*start & PMD_MASK) + i * PMD_SIZE : *start; + + VM_BUG_ON(addr < vma->vm_start || addr >= vma->vm_end); + + if (!pmd_present(pmd[i]) || is_huge_zero_pmd(pmd[i])) + goto next; + + if (WARN_ON_ONCE(pmd_devmap(pmd[i]))) + goto next; + + if (!pmd_trans_huge(pmd[i])) { + if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + get_cap(LRU_GEN_NONLEAF_YOUNG)) + pmdp_test_and_clear_young(vma, addr, pmd + i); + goto next; + } + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + goto next; + + folio = pfn_folio(pfn); + if (folio_nid(folio) != pgdat->node_id) + goto next; + + if (folio_memcg_rcu(folio) != memcg) + goto next; + + if (!pmdp_test_and_clear_young(vma, addr, pmd + i)) + goto next; + + priv->mm_stats[MM_PTE_YOUNG]++; + + if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) && + !(folio_test_anon(folio) && folio_test_swapbacked(folio) && + !folio_test_swapcache(folio))) + folio_mark_dirty(folio); + + old_gen = folio_update_gen(folio, new_gen); + if (old_gen >= 0 && old_gen != new_gen) + update_batch_size(priv, folio, old_gen, new_gen); +next: + i = i > MIN_LRU_BATCH ? 0 : + find_next_bit(priv->bitmap, MIN_LRU_BATCH, i) + 1; + } while (i <= MIN_LRU_BATCH); + + arch_leave_lazy_mmu_mode(); + spin_unlock(ptl); + + *start = -1; + bitmap_zero(priv->bitmap, MIN_LRU_BATCH); +} +#else +static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, + struct mm_walk *walk, unsigned long *start) +{ +} +#endif + +static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int i; + pmd_t *pmd; + unsigned long next; + unsigned long addr; + struct vm_area_struct *vma; + unsigned long pos = -1; + struct lru_gen_mm_walk *priv = walk->private; + + VM_BUG_ON(pud_leaf(*pud)); + + /* + * Finish an entire PMD in two passes: the first only reaches to PTE + * tables to avoid taking the PMD lock; the second, if necessary, takes + * the PMD lock to clear the accessed bit in PMD entries. + */ + pmd = pmd_offset(pud, start & PUD_MASK); +restart: + /* walk_pte_range() may call get_next_vma() */ + vma = walk->vma; + for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) { + pmd_t val = pmd_read_atomic(pmd + i); + + /* for pmd_read_atomic() */ + barrier(); + + next = pmd_addr_end(addr, end); + + if (!pmd_present(val)) { + priv->mm_stats[MM_PTE_TOTAL]++; + continue; + } + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_trans_huge(val)) { + unsigned long pfn = pmd_pfn(val); + struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + + priv->mm_stats[MM_PTE_TOTAL]++; + + if (is_huge_zero_pmd(val)) + continue; + + if (!pmd_young(val)) { + priv->mm_stats[MM_PTE_OLD]++; + continue; + } + + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + walk_pmd_range_locked(pud, addr, vma, walk, &pos); + continue; + } +#endif + priv->mm_stats[MM_PMD_TOTAL]++; + +#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG + if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (!pmd_young(val)) + continue; + + walk_pmd_range_locked(pud, addr, vma, walk, &pos); + } +#endif + if (!priv->full_scan && !test_bloom_filter(priv->lruvec, priv->max_seq, pmd + i)) + continue; + + priv->mm_stats[MM_PMD_FOUND]++; + + if (!walk_pte_range(&val, addr, next, walk)) + continue; + + priv->mm_stats[MM_PMD_ADDED]++; + + /* carry over to the next generation */ + update_bloom_filter(priv->lruvec, priv->max_seq + 1, pmd + i); + } + + walk_pmd_range_locked(pud, -1, vma, walk, &pos); + + if (i < PTRS_PER_PMD && get_next_vma(walk, PUD_MASK, PMD_SIZE, &start, &end)) + goto restart; +} + +static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int i; + pud_t *pud; + unsigned long addr; + unsigned long next; + struct lru_gen_mm_walk *priv = walk->private; + + VM_BUG_ON(p4d_leaf(*p4d)); + + pud = pud_offset(p4d, start & P4D_MASK); +restart: + for (i = pud_index(start), addr = start; addr != end; i++, addr = next) { + pud_t val = READ_ONCE(pud[i]); + + next = pud_addr_end(addr, end); + + if (!pud_present(val) || WARN_ON_ONCE(pud_leaf(val))) + continue; + + walk_pmd_range(&val, addr, next, walk); + + if (priv->batched >= MAX_LRU_BATCH) { + end = (addr | ~PUD_MASK) + 1; + goto done; + } + } + + if (i < PTRS_PER_PUD && get_next_vma(walk, P4D_MASK, PUD_SIZE, &start, &end)) + goto restart; + + end = round_up(end, P4D_SIZE); +done: + /* rounded-up boundaries can wrap to 0 */ + priv->next_addr = end && walk->vma ? max(end, walk->vma->vm_start) : 0; + + return -EAGAIN; +} + +static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_mm_walk *walk) +{ + static const struct mm_walk_ops mm_walk_ops = { + .test_walk = should_skip_vma, + .p4d_entry = walk_pud_range, + }; + + int err; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + + walk->next_addr = FIRST_USER_ADDRESS; + + do { + err = -EBUSY; + + /* folio_update_gen() requires stable folio_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) + break; + + /* the caller might be holding the lock for write */ + if (mmap_read_trylock(mm)) { + unsigned long start = walk->next_addr; + unsigned long end = mm->highest_vm_end; + + err = walk_page_range(mm, start, end, &mm_walk_ops, walk); + + mmap_read_unlock(mm); + + if (walk->batched) { + spin_lock_irq(&lruvec->lru_lock); + reset_batch_size(lruvec, walk); + spin_unlock_irq(&lruvec->lru_lock); + } + } + + mem_cgroup_unlock_pages(); + + cond_resched(); + } while (err == -EAGAIN && walk->next_addr && !mm_is_oom_victim(mm)); +} + +static struct lru_gen_mm_walk *alloc_mm_walk(void) +{ + if (current->reclaim_state && current->reclaim_state->mm_walk) + return current->reclaim_state->mm_walk; + + return kzalloc(sizeof(struct lru_gen_mm_walk), + __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); +} + +static void free_mm_walk(struct lru_gen_mm_walk *walk) +{ + if (!current->reclaim_state || !current->reclaim_state->mm_walk) + kfree(walk); +} + +static void inc_min_seq(struct lruvec *lruvec) +{ + int type; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON(!seq_is_valid(lruvec)); + + for (type = 0; type < ANON_AND_FILE; type++) { + if (get_nr_gens(lruvec, type) != MAX_NR_GENS) + continue; + + reset_ctrl_pos(lruvec, type, true); + WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1); + } +} + +static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) +{ + int gen, type, zone; + bool success = false; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + DEFINE_MIN_SEQ(lruvec); + + VM_BUG_ON(!seq_is_valid(lruvec)); + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) { + gen = lru_gen_from_seq(min_seq[type]); + + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + if (!list_empty(&lrugen->lists[gen][type][zone])) + goto next; + } + + min_seq[type]++; + } +next: + ; + } + + /* see the comment on lru_gen_struct */ + if (can_swap) { + min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]); + min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]); + } + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + if (min_seq[type] == lrugen->min_seq[type]) + continue; + + reset_ctrl_pos(lruvec, type, true); + WRITE_ONCE(lrugen->min_seq[type], min_seq[type]); + success = true; + } + + return success; +} + +static void inc_max_seq(struct lruvec *lruvec) +{ + int prev, next; + int type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + spin_lock_irq(&lruvec->lru_lock); + + VM_BUG_ON(!seq_is_valid(lruvec)); + + inc_min_seq(lruvec); + + /* update the active/inactive LRU sizes for compatibility */ + prev = lru_gen_from_seq(lrugen->max_seq - 1); + next = lru_gen_from_seq(lrugen->max_seq + 1); + + for (type = 0; type < ANON_AND_FILE; type++) { + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + enum lru_list lru = type * LRU_INACTIVE_FILE; + long delta = lrugen->nr_pages[prev][type][zone] - + lrugen->nr_pages[next][type][zone]; + + if (!delta) + continue; + + __update_lru_size(lruvec, lru, zone, delta); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -delta); + } + } + + for (type = 0; type < ANON_AND_FILE; type++) + reset_ctrl_pos(lruvec, type, false); + + WRITE_ONCE(lrugen->timestamps[next], jiffies); + /* make sure preceding modifications appear */ + smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1); + + spin_unlock_irq(&lruvec->lru_lock); +} + +static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, + struct scan_control *sc, bool can_swap, bool full_scan) +{ + bool success; + struct lru_gen_mm_walk *walk; + struct mm_struct *mm = NULL; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON(max_seq > READ_ONCE(lrugen->max_seq)); + + /* + * If the hardware doesn't automatically set the accessed bit, fallback + * to lru_gen_look_around(), which only clears the accessed bit in a + * handful of PTEs. Spreading the work out over a period of time usually + * is less efficient, but it avoids bursty page faults. + */ + if (!full_scan && (!arch_has_hw_pte_young() || !get_cap(LRU_GEN_MM_WALK))) { + success = iterate_mm_list_nowalk(lruvec, max_seq); + goto done; + } + + walk = alloc_mm_walk(); + if (!walk) { + success = iterate_mm_list_nowalk(lruvec, max_seq); + goto done; + } + + walk->lruvec = lruvec; + walk->max_seq = max_seq; + walk->can_swap = can_swap; + walk->full_scan = full_scan; + + do { + success = iterate_mm_list(lruvec, walk, &mm); + if (mm) + walk_mm(lruvec, mm, walk); + + cond_resched(); + } while (mm); + + free_mm_walk(walk); +done: + if (!success) { + if (!current_is_kswapd() && !sc->priority) + wait_event_killable(lruvec->mm_state.wait, + max_seq < READ_ONCE(lrugen->max_seq)); + + return max_seq < READ_ONCE(lrugen->max_seq); + } + + VM_BUG_ON(max_seq != READ_ONCE(lrugen->max_seq)); + + inc_max_seq(lruvec); + /* either this sees any waiters or they will see updated max_seq */ + if (wq_has_sleeper(&lruvec->mm_state.wait)) + wake_up_all(&lruvec->mm_state.wait); + + wakeup_flusher_threads(WB_REASON_VMSCAN); + + return true; +} + +static long get_nr_evictable(struct lruvec *lruvec, unsigned long max_seq, + unsigned long *min_seq, bool can_swap, bool *need_aging) +{ + int gen, type, zone; + long old = 0; + long young = 0; + long total = 0; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + unsigned long seq; + + for (seq = min_seq[type]; seq <= max_seq; seq++) { + long size = 0; + + gen = lru_gen_from_seq(seq); + + for (zone = 0; zone < MAX_NR_ZONES; zone++) + size += READ_ONCE(lrugen->nr_pages[gen][type][zone]); + + total += size; + if (seq == max_seq) + young += size; + if (seq + MIN_NR_GENS == max_seq) + old += size; + } + } + + /* try to spread pages out across MIN_NR_GENS+1 generations */ + if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS > max_seq) + *need_aging = true; + else if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS < max_seq) + *need_aging = false; + else if (young * MIN_NR_GENS > total) + *need_aging = true; + else if (old * (MIN_NR_GENS + 2) < total) + *need_aging = true; + else + *need_aging = false; + + return total > 0 ? total : 0; +} + +static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, + unsigned long min_ttl) +{ + bool need_aging; + long nr_to_scan; + int swappiness = get_swappiness(lruvec, sc); + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (min_ttl) { + int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); + unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); + + if (time_is_after_jiffies(birth + min_ttl)) + return false; + } + + mem_cgroup_calculate_protection(NULL, memcg); + + if (mem_cgroup_below_min(memcg)) + return false; + + nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, swappiness, &need_aging); + if (!nr_to_scan) + return false; + + nr_to_scan >>= sc->priority; + + if (!mem_cgroup_online(memcg)) + nr_to_scan++; + + if (nr_to_scan && need_aging && (!mem_cgroup_below_low(memcg) || sc->memcg_low_reclaim)) + try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false); + + return true; +} + +/* to protect the working set of the last N jiffies */ +static unsigned long lru_gen_min_ttl __read_mostly; + +static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +{ + struct mem_cgroup *memcg; + bool success = false; + unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); + + VM_BUG_ON(!current_is_kswapd()); + + /* + * To reduce the chance of going into the aging path or swapping, which + * can be costly, optimistically skip them unless their corresponding + * flags were cleared in the eviction path. This improves the overall + * performance when multiple memcgs are available. + */ + if (!sc->memcgs_need_aging) { + sc->memcgs_need_aging = true; + sc->memcgs_avoid_swapping = !sc->memcgs_need_swapping; + sc->memcgs_need_swapping = true; + return; + } + + sc->memcgs_need_swapping = true; + sc->memcgs_avoid_swapping = true; + + current->reclaim_state->mm_walk = &pgdat->mm_walk; + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + + if (age_lruvec(lruvec, sc, min_ttl)) + success = true; + + cond_resched(); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); + + current->reclaim_state->mm_walk = NULL; + + /* + * The main goal is to OOM kill if every generation from all memcgs is + * younger than min_ttl. However, another theoretical possibility is all + * memcgs are either below min or empty. + */ + if (!success && mutex_trylock(&oom_lock)) { + struct oom_control oc = { + .gfp_mask = sc->gfp_mask, + .order = sc->order, + }; + + out_of_memory(&oc); + + mutex_unlock(&oom_lock); + } +} + +/* + * This function exploits spatial locality when shrink_page_list() walks the + * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. + * If the scan was done cacheline efficiently, it adds the PMD entry pointing + * to the PTE table to the Bloom filter. This process is a feedback loop from + * the eviction to the aging. + */ +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ + int i; + pte_t *pte; + unsigned long start; + unsigned long end; + unsigned long addr; + struct folio *folio; + struct lru_gen_mm_walk *walk; + int young = 0; + unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {}; + struct mem_cgroup *memcg = page_memcg(pvmw->page); + struct pglist_data *pgdat = page_pgdat(pvmw->page); + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + DEFINE_MAX_SEQ(lruvec); + int old_gen, new_gen = lru_gen_from_seq(max_seq); + + lockdep_assert_held(pvmw->ptl); + VM_BUG_ON_PAGE(PageLRU(pvmw->page), pvmw->page); + + start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start); + end = pmd_addr_end(pvmw->address, pvmw->vma->vm_end); + + if (end - start > MIN_LRU_BATCH * PAGE_SIZE) { + if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2) + end = start + MIN_LRU_BATCH * PAGE_SIZE; + else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2) + start = end - MIN_LRU_BATCH * PAGE_SIZE; + else { + start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2; + end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2; + } + } + + pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE; + + rcu_read_lock(); + arch_enter_lazy_mmu_mode(); + + for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) { + unsigned long pfn = pte_pfn(pte[i]); + + VM_BUG_ON(addr < pvmw->vma->vm_start || addr >= pvmw->vma->vm_end); + + if (!pte_present(pte[i]) || is_zero_pfn(pfn)) + continue; + + if (WARN_ON_ONCE(pte_devmap(pte[i]) || pte_special(pte[i]))) + continue; + + if (!pte_young(pte[i])) + continue; + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + folio = pfn_folio(pfn); + if (folio_nid(folio) != pgdat->node_id) + continue; + + if (folio_memcg_rcu(folio) != memcg) + continue; + + if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i)) + continue; + + young++; + + if (pte_dirty(pte[i]) && !folio_test_dirty(folio) && + !(folio_test_anon(folio) && folio_test_swapbacked(folio) && + !folio_test_swapcache(folio))) + folio_mark_dirty(folio); + + old_gen = folio_lru_gen(folio); + if (old_gen < 0) + folio_set_referenced(folio); + else if (old_gen != new_gen) + __set_bit(i, bitmap); + } + + arch_leave_lazy_mmu_mode(); + rcu_read_unlock(); + + /* feedback from rmap walkers to page table walkers */ + if (suitable_to_scan(i, young)) + update_bloom_filter(lruvec, max_seq, pvmw->pmd); + + walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; + + if (!walk && bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) { + for_each_set_bit(i, bitmap, MIN_LRU_BATCH) { + folio = page_folio(pte_page(pte[i])); + folio_activate(folio); + } + return; + } + + /* folio_update_gen() requires stable folio_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) + return; + + if (!walk) { + spin_lock_irq(&lruvec->lru_lock); + new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq); + } + + for_each_set_bit(i, bitmap, MIN_LRU_BATCH) { + folio = page_folio(pte_page(pte[i])); + if (folio_memcg_rcu(folio) != memcg) + continue; + + old_gen = folio_update_gen(folio, new_gen); + if (old_gen < 0 || old_gen == new_gen) + continue; + + if (walk) + update_batch_size(walk, folio, old_gen, new_gen); + else + lru_gen_update_size(lruvec, folio, old_gen, new_gen); + } + + if (!walk) + spin_unlock_irq(&lruvec->lru_lock); + + mem_cgroup_unlock_pages(); +} + +/****************************************************************************** + * the eviction + ******************************************************************************/ + +static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) +{ + bool success; + int gen = folio_lru_gen(folio); + int type = folio_is_file_lru(folio); + int zone = folio_zonenum(folio); + int tier = folio_lru_tier(folio); + int delta = folio_nr_pages(folio); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON_FOLIO(gen >= MAX_NR_GENS, folio); + + if (!folio_evictable(folio)) { + success = lru_gen_del_folio(lruvec, folio, true); + VM_BUG_ON_FOLIO(!success, folio); + folio_set_unevictable(folio); + lruvec_add_folio(lruvec, folio); + __count_vm_events(UNEVICTABLE_PGCULLED, delta); + return true; + } + + if (type == LRU_GEN_FILE && folio_test_anon(folio) && folio_test_dirty(folio)) { + success = lru_gen_del_folio(lruvec, folio, true); + VM_BUG_ON_FOLIO(!success, folio); + folio_set_swapbacked(folio); + lruvec_add_folio_tail(lruvec, folio); + return true; + } + + if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { + list_move(&folio->lru, &lrugen->lists[gen][type][zone]); + return true; + } + + if (tier > tier_idx) { + int hist = lru_hist_from_seq(lrugen->min_seq[type]); + + gen = folio_inc_gen(lruvec, folio, false); + list_move_tail(&folio->lru, &lrugen->lists[gen][type][zone]); + + WRITE_ONCE(lrugen->protected[hist][type][tier - 1], + lrugen->protected[hist][type][tier - 1] + delta); + __mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta); + return true; + } + + if (folio_test_locked(folio) || folio_test_writeback(folio) || + (type == LRU_GEN_FILE && folio_test_dirty(folio))) { + gen = folio_inc_gen(lruvec, folio, true); + list_move(&folio->lru, &lrugen->lists[gen][type][zone]); + return true; + } + + return false; +} + +static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc) +{ + bool success; + + if (!sc->may_unmap && folio_mapped(folio)) + return false; + + if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) && + (folio_test_dirty(folio) || + (folio_test_anon(folio) && !folio_test_swapcache(folio)))) + return false; + + if (!folio_try_get(folio)) + return false; + + if (!folio_test_clear_lru(folio)) { + folio_put(folio); + return false; + } + + success = lru_gen_del_folio(lruvec, folio, true); + VM_BUG_ON_FOLIO(!success, folio); + + return true; +} + +static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, + int type, int tier, struct list_head *list) +{ + int gen, zone; + enum vm_event_item item; + int sorted = 0; + int scanned = 0; + int isolated = 0; + int remaining = MAX_LRU_BATCH; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + + VM_BUG_ON(!list_empty(list)); + + if (get_nr_gens(lruvec, type) == MIN_NR_GENS) + return 0; + + gen = lru_gen_from_seq(lrugen->min_seq[type]); + + for (zone = sc->reclaim_idx; zone >= 0; zone--) { + LIST_HEAD(moved); + int skipped = 0; + struct list_head *head = &lrugen->lists[gen][type][zone]; + + while (!list_empty(head)) { + struct folio *folio = lru_to_folio(head); + int delta = folio_nr_pages(folio); + + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + VM_BUG_ON_FOLIO(folio_test_active(folio), folio); + VM_BUG_ON_FOLIO(folio_is_file_lru(folio) != type, folio); + VM_BUG_ON_FOLIO(folio_zonenum(folio) != zone, folio); + + scanned += delta; + + if (sort_folio(lruvec, folio, tier)) + sorted += delta; + else if (isolate_folio(lruvec, folio, sc)) { + list_add(&folio->lru, list); + isolated += delta; + } else { + list_move(&folio->lru, &moved); + skipped += delta; + } + + if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH) + break; + } + + if (skipped) { + list_splice(&moved, head); + __count_zid_vm_events(PGSCAN_SKIP, zone, skipped); + } + + if (!remaining || isolated >= MIN_LRU_BATCH) + break; + } + + item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT; + if (!cgroup_reclaim(sc)) { + __count_vm_events(item, isolated); + __count_vm_events(PGREFILL, sorted); + } + __count_memcg_events(memcg, item, isolated); + __count_memcg_events(memcg, PGREFILL, sorted); + __count_vm_events(PGSCAN_ANON + type, isolated); + + /* + * There might not be eligible pages due to reclaim_idx, may_unmap and + * may_writepage. Check the remaining to prevent livelock if there is no + * progress. + */ + return isolated || !remaining ? scanned : 0; +} + +static int get_tier_idx(struct lruvec *lruvec, int type) +{ + int tier; + struct ctrl_pos sp, pv; + + /* + * To leave a margin for fluctuations, use a larger gain factor (1:2). + * This value is chosen because any other tier would have at least twice + * as many refaults as the first tier. + */ + read_ctrl_pos(lruvec, type, 0, 1, &sp); + for (tier = 1; tier < MAX_NR_TIERS; tier++) { + read_ctrl_pos(lruvec, type, tier, 2, &pv); + if (!positive_ctrl_err(&sp, &pv)) + break; + } + + return tier - 1; +} + +static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx) +{ + int type, tier; + struct ctrl_pos sp, pv; + int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness }; + + /* + * Compare the first tier of anon with that of file to determine which + * type to scan. Also need to compare other tiers of the selected type + * with the first tier of the other type to determine the last tier (of + * the selected type) to evict. + */ + read_ctrl_pos(lruvec, LRU_GEN_ANON, 0, gain[LRU_GEN_ANON], &sp); + read_ctrl_pos(lruvec, LRU_GEN_FILE, 0, gain[LRU_GEN_FILE], &pv); + type = positive_ctrl_err(&sp, &pv); + + read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp); + for (tier = 1; tier < MAX_NR_TIERS; tier++) { + read_ctrl_pos(lruvec, type, tier, gain[type], &pv); + if (!positive_ctrl_err(&sp, &pv)) + break; + } + + *tier_idx = tier - 1; + + return type; +} + +static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness, + int *type_scanned, struct list_head *list) +{ + int i; + int type; + int scanned; + int tier = -1; + DEFINE_MIN_SEQ(lruvec); + + VM_BUG_ON(!seq_is_valid(lruvec)); + + /* + * Try to make the obvious choice first. When anon and file are both + * available from the same generation, interpret swappiness 1 as file + * first and 200 as anon first. + */ + if (!swappiness) + type = LRU_GEN_FILE; + else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE]) + type = LRU_GEN_ANON; + else if (swappiness == 1) + type = LRU_GEN_FILE; + else if (swappiness == 200) + type = LRU_GEN_ANON; + else + type = get_type_to_scan(lruvec, swappiness, &tier); + + for (i = !swappiness; i < ANON_AND_FILE; i++) { + if (tier < 0) + tier = get_tier_idx(lruvec, type); + + scanned = scan_folios(lruvec, sc, type, tier, list); + if (scanned) + break; + + type = !type; + tier = -1; + } + + *type_scanned = type; + + return scanned; +} + +static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness, + bool *swapped) +{ + int type; + int scanned; + int reclaimed; + LIST_HEAD(list); + struct folio *folio; + enum vm_event_item item; + struct reclaim_stat stat; + struct lru_gen_mm_walk *walk; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + spin_lock_irq(&lruvec->lru_lock); + + scanned = isolate_folios(lruvec, sc, swappiness, &type, &list); + + if (try_to_inc_min_seq(lruvec, swappiness)) + scanned++; + + if (get_nr_gens(lruvec, LRU_GEN_FILE) == MIN_NR_GENS) + scanned = 0; + + spin_unlock_irq(&lruvec->lru_lock); + + if (list_empty(&list)) + return scanned; + + reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false); + + /* + * To avoid livelock, don't add rejected pages back to the same lists + * they were isolated from. See lru_gen_add_folio(). + */ + list_for_each_entry(folio, &list, lru) { + if (folio_test_reclaim(folio) && + (folio_test_dirty(folio) || folio_test_writeback(folio))) + folio_clear_active(folio); + else if (folio_is_file_lru(folio) || folio_test_swapcache(folio)) + folio_set_active(folio); + + folio_clear_referenced(folio); + folio_clear_workingset(folio); + } + + spin_lock_irq(&lruvec->lru_lock); + + move_pages_to_lru(lruvec, &list); + + walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; + if (walk && walk->batched) + reset_batch_size(lruvec, walk); + + item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT; + if (!cgroup_reclaim(sc)) + __count_vm_events(item, reclaimed); + __count_memcg_events(memcg, item, reclaimed); + __count_vm_events(PGSTEAL_ANON + type, reclaimed); + + spin_unlock_irq(&lruvec->lru_lock); + + mem_cgroup_uncharge_list(&list); + free_unref_page_list(&list); + + sc->nr_reclaimed += reclaimed; + + if (type == LRU_GEN_ANON && swapped) + *swapped = true; + + return scanned; +} + +static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap) +{ + bool need_aging; + long nr_to_scan; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (mem_cgroup_below_min(memcg) || + (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim)) + return 0; + + nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, can_swap, &need_aging); + if (!nr_to_scan) + return 0; + + /* reset the priority if the target has been met */ + nr_to_scan >>= sc->nr_reclaimed < sc->nr_to_reclaim ? sc->priority : DEF_PRIORITY; + + if (!mem_cgroup_online(memcg)) + nr_to_scan++; + + if (!nr_to_scan) + return 0; + + if (!need_aging) { + sc->memcgs_need_aging = false; + return nr_to_scan; + } + + /* leave the work to lru_gen_age_node() */ + if (current_is_kswapd()) + return 0; + + /* try other memcgs before going to the aging path */ + if (!cgroup_reclaim(sc) && !sc->force_deactivate) { + sc->skipped_deactivate = true; + return 0; + } + + if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false)) + return nr_to_scan; + + return min_seq[LRU_GEN_FILE] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; +} + +static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ + struct blk_plug plug; + long scanned = 0; + bool swapped = false; + unsigned long reclaimed = sc->nr_reclaimed; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + lru_add_drain(); + + blk_start_plug(&plug); + + if (current_is_kswapd()) + current->reclaim_state->mm_walk = &pgdat->mm_walk; + + while (true) { + int delta; + int swappiness; + long nr_to_scan; + + if (sc->may_swap) + swappiness = get_swappiness(lruvec, sc); + else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc)) + swappiness = 1; + else + swappiness = 0; + + nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); + if (!nr_to_scan) + break; + + delta = evict_folios(lruvec, sc, swappiness, &swapped); + if (!delta) + break; + + if (sc->memcgs_avoid_swapping && swappiness < 200 && swapped) + break; + + scanned += delta; + if (scanned >= nr_to_scan) { + if (!swapped && sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH) + sc->memcgs_need_swapping = false; + break; + } + + cond_resched(); + } + + if (current_is_kswapd()) + current->reclaim_state->mm_walk = NULL; + + blk_finish_plug(&plug); +} + +/****************************************************************************** + * state change + ******************************************************************************/ + +static bool __maybe_unused state_is_valid(struct lruvec *lruvec) +{ + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + if (lrugen->enabled) { + enum lru_list lru; + + for_each_evictable_lru(lru) { + if (!list_empty(&lruvec->lists[lru])) + return false; + } + } else { + int gen, type, zone; + + for_each_gen_type_zone(gen, type, zone) { + if (!list_empty(&lrugen->lists[gen][type][zone])) + return false; + + /* unlikely but not a bug when reset_batch_size() is pending */ + VM_WARN_ON(lrugen->nr_pages[gen][type][zone]); + } + } + + return true; +} + +static bool fill_evictable(struct lruvec *lruvec) +{ + enum lru_list lru; + int remaining = MAX_LRU_BATCH; + + for_each_evictable_lru(lru) { + int type = is_file_lru(lru); + bool active = is_active_lru(lru); + struct list_head *head = &lruvec->lists[lru]; + + while (!list_empty(head)) { + bool success; + struct folio *folio = lru_to_folio(head); + + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + VM_BUG_ON_FOLIO(folio_test_active(folio) != active, folio); + VM_BUG_ON_FOLIO(folio_is_file_lru(folio) != type, folio); + VM_BUG_ON_FOLIO(folio_lru_gen(folio) < MAX_NR_GENS, folio); + + lruvec_del_folio(lruvec, folio); + success = lru_gen_add_folio(lruvec, folio, false); + VM_BUG_ON(!success); + + if (!--remaining) + return false; + } + } + + return true; +} + +static bool drain_evictable(struct lruvec *lruvec) +{ + int gen, type, zone; + int remaining = MAX_LRU_BATCH; + + for_each_gen_type_zone(gen, type, zone) { + struct list_head *head = &lruvec->lrugen.lists[gen][type][zone]; + + while (!list_empty(head)) { + bool success; + struct folio *folio = lru_to_folio(head); + + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + VM_BUG_ON_FOLIO(folio_test_active(folio), folio); + VM_BUG_ON_FOLIO(folio_is_file_lru(folio) != type, folio); + VM_BUG_ON_FOLIO(folio_zonenum(folio) != zone, folio); + + success = lru_gen_del_folio(lruvec, folio, false); + VM_BUG_ON(!success); + lruvec_add_folio(lruvec, folio); + + if (!--remaining) + return false; + } + } + + return true; +} + +static void lru_gen_change_state(bool enable) +{ + static DEFINE_MUTEX(state_mutex); + + struct mem_cgroup *memcg; + + cgroup_lock(); + cpus_read_lock(); + get_online_mems(); + mutex_lock(&state_mutex); + + if (enable == lru_gen_enabled()) + goto unlock; + + if (enable) + static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); + else + static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + int nid; + + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + spin_lock_irq(&lruvec->lru_lock); + + VM_BUG_ON(!seq_is_valid(lruvec)); + VM_BUG_ON(!state_is_valid(lruvec)); + + lruvec->lrugen.enabled = enable; + + while (!(enable ? fill_evictable(lruvec) : drain_evictable(lruvec))) { + spin_unlock_irq(&lruvec->lru_lock); + cond_resched(); + spin_lock_irq(&lruvec->lru_lock); + } + + spin_unlock_irq(&lruvec->lru_lock); + } + + cond_resched(); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); +unlock: + mutex_unlock(&state_mutex); + put_online_mems(); + cpus_read_unlock(); + cgroup_unlock(); +} + +/****************************************************************************** + * sysfs interface + ******************************************************************************/ + +static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl))); +} + +static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t len) +{ + unsigned int msecs; + + if (kstrtouint(buf, 0, &msecs)) + return -EINVAL; + + WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs)); + + return len; +} + +static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR( + min_ttl_ms, 0644, show_min_ttl, store_min_ttl +); + +static ssize_t show_enable(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + unsigned int caps = 0; + + if (get_cap(LRU_GEN_CORE)) + caps |= BIT(LRU_GEN_CORE); + + if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) + caps |= BIT(LRU_GEN_MM_WALK); + + if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + caps |= BIT(LRU_GEN_NONLEAF_YOUNG); + + return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); +} + +static ssize_t store_enable(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int i; + unsigned int caps; + + if (tolower(*buf) == 'n') + caps = 0; + else if (tolower(*buf) == 'y') + caps = -1; + else if (kstrtouint(buf, 0, &caps)) + return -EINVAL; + + for (i = 0; i < NR_LRU_GEN_CAPS; i++) { + bool enable = caps & BIT(i); + + if (i == LRU_GEN_CORE) + lru_gen_change_state(enable); + else if (enable) + static_branch_enable(&lru_gen_caps[i]); + else + static_branch_disable(&lru_gen_caps[i]); + } + + return len; +} + +static struct kobj_attribute lru_gen_enabled_attr = __ATTR( + enabled, 0644, show_enable, store_enable +); + +static struct attribute *lru_gen_attrs[] = { + &lru_gen_min_ttl_attr.attr, + &lru_gen_enabled_attr.attr, + NULL +}; + +static struct attribute_group lru_gen_attr_group = { + .name = "lru_gen", + .attrs = lru_gen_attrs, +}; + +/****************************************************************************** + * debugfs interface + ******************************************************************************/ + +static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos) +{ + struct mem_cgroup *memcg; + loff_t nr_to_skip = *pos; + + m->private = kvmalloc(PATH_MAX, GFP_KERNEL); + if (!m->private) + return ERR_PTR(-ENOMEM); + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + int nid; + + for_each_node_state(nid, N_MEMORY) { + if (!nr_to_skip--) + return get_lruvec(memcg, nid); + } + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); + + return NULL; +} + +static void lru_gen_seq_stop(struct seq_file *m, void *v) +{ + if (!IS_ERR_OR_NULL(v)) + mem_cgroup_iter_break(NULL, lruvec_memcg(v)); + + kvfree(m->private); + m->private = NULL; +} + +static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + int nid = lruvec_pgdat(v)->node_id; + struct mem_cgroup *memcg = lruvec_memcg(v); + + ++*pos; + + nid = next_memory_node(nid); + if (nid == MAX_NUMNODES) { + memcg = mem_cgroup_iter(NULL, memcg, NULL); + if (!memcg) + return NULL; + + nid = first_memory_node; + } + + return get_lruvec(memcg, nid); +} + +static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, + unsigned long max_seq, unsigned long *min_seq, + unsigned long seq) +{ + int i; + int type, tier; + int hist = lru_hist_from_seq(seq); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + for (tier = 0; tier < MAX_NR_TIERS; tier++) { + seq_printf(m, " %10d", tier); + for (type = 0; type < ANON_AND_FILE; type++) { + unsigned long n[3] = {}; + + if (seq == max_seq) { + n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]); + n[1] = READ_ONCE(lrugen->avg_total[type][tier]); + + seq_printf(m, " %10luR %10luT %10lu ", n[0], n[1], n[2]); + } else if (seq == min_seq[type] || NR_HIST_GENS > 1) { + n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]); + n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]); + if (tier) + n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]); + + seq_printf(m, " %10lur %10lue %10lup", n[0], n[1], n[2]); + } else + seq_puts(m, " 0 0 0 "); + } + seq_putc(m, '\n'); + } + + seq_puts(m, " "); + for (i = 0; i < NR_MM_STATS; i++) { + if (seq == max_seq && NR_HIST_GENS == 1) + seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_state.stats[hist][i]), + toupper(MM_STAT_CODES[i])); + else if (seq != max_seq && NR_HIST_GENS > 1) + seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_state.stats[hist][i]), + MM_STAT_CODES[i]); + else + seq_puts(m, " 0 "); + } + seq_putc(m, '\n'); +} + +static int lru_gen_seq_show(struct seq_file *m, void *v) +{ + unsigned long seq; + bool full = !debugfs_real_fops(m->file)->write; + struct lruvec *lruvec = v; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + int nid = lruvec_pgdat(lruvec)->node_id; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (nid == first_memory_node) { + const char *path = memcg ? m->private : ""; + +#ifdef CONFIG_MEMCG + if (memcg) + cgroup_path(memcg->css.cgroup, m->private, PATH_MAX); +#endif + seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path); + } + + seq_printf(m, " node %5d\n", nid); + + if (!full) + seq = min_seq[LRU_GEN_ANON]; + else if (max_seq >= MAX_NR_GENS) + seq = max_seq - MAX_NR_GENS + 1; + else + seq = 0; + + for (; seq <= max_seq; seq++) { + int gen, type, zone; + unsigned int msecs; + + gen = lru_gen_from_seq(seq); + msecs = jiffies_to_msecs(jiffies - READ_ONCE(lrugen->timestamps[gen])); + + seq_printf(m, " %10lu %10u", seq, msecs); + + for (type = 0; type < ANON_AND_FILE; type++) { + long size = 0; + + if (seq < min_seq[type]) { + seq_puts(m, " -0 "); + continue; + } + + for (zone = 0; zone < MAX_NR_ZONES; zone++) + size += READ_ONCE(lrugen->nr_pages[gen][type][zone]); + + seq_printf(m, " %10lu ", max(size, 0L)); + } + + seq_putc(m, '\n'); + + if (full) + lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq); + } + + return 0; +} + +static const struct seq_operations lru_gen_seq_ops = { + .start = lru_gen_seq_start, + .stop = lru_gen_seq_stop, + .next = lru_gen_seq_next, + .show = lru_gen_seq_show, +}; + +static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc, + bool can_swap, bool full_scan) +{ + DEFINE_MAX_SEQ(lruvec); + + if (seq == max_seq) + try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, full_scan); + + return seq > max_seq ? -EINVAL : 0; +} + +static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc, + int swappiness, unsigned long nr_to_reclaim) +{ + struct blk_plug plug; + int err = -EINTR; + DEFINE_MAX_SEQ(lruvec); + + if (seq + MIN_NR_GENS > max_seq) + return -EINVAL; + + sc->nr_reclaimed = 0; + + blk_start_plug(&plug); + + while (!signal_pending(current)) { + DEFINE_MIN_SEQ(lruvec); + + if (seq < min_seq[!swappiness] || sc->nr_reclaimed >= nr_to_reclaim || + !evict_folios(lruvec, sc, swappiness, NULL)) { + err = 0; + break; + } + + cond_resched(); + } + + blk_finish_plug(&plug); + + return err; +} + +static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, + struct scan_control *sc, int swappiness, unsigned long opt) +{ + struct lruvec *lruvec; + int err = -EINVAL; + struct mem_cgroup *memcg = NULL; + + if (!mem_cgroup_disabled()) { + rcu_read_lock(); + memcg = mem_cgroup_from_id(memcg_id); +#ifdef CONFIG_MEMCG + if (memcg && !css_tryget(&memcg->css)) + memcg = NULL; +#endif + rcu_read_unlock(); + + if (!memcg) + goto done; + } + if (memcg_id != mem_cgroup_id(memcg)) + goto done; + + if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY)) + goto done; + + lruvec = get_lruvec(memcg, nid); + + if (swappiness < 0) + swappiness = get_swappiness(lruvec, sc); + else if (swappiness > 200) + goto done; + + switch (cmd) { + case '+': + err = run_aging(lruvec, seq, sc, swappiness, opt); + break; + case '-': + err = run_eviction(lruvec, seq, sc, swappiness, opt); + break; + } +done: + mem_cgroup_put(memcg); + + return err; +} + +static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, + size_t len, loff_t *pos) +{ + void *buf; + char *cur, *next; + unsigned int flags; + int err = 0; + struct scan_control sc = { + .may_writepage = true, + .may_unmap = true, + .may_swap = true, + .reclaim_idx = MAX_NR_ZONES - 1, + .gfp_mask = GFP_KERNEL, + }; + + buf = kvmalloc(len + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, src, len)) { + kvfree(buf); + return -EFAULT; + } + + next = buf; + next[len] = '\0'; + + sc.reclaim_state.mm_walk = alloc_mm_walk(); + if (!sc.reclaim_state.mm_walk) { + kvfree(buf); + return -ENOMEM; + } + + set_task_reclaim_state(current, &sc.reclaim_state); + flags = memalloc_noreclaim_save(); + + while ((cur = strsep(&next, ",;\n"))) { + int n; + int end; + char cmd; + unsigned int memcg_id; + unsigned int nid; + unsigned long seq; + unsigned int swappiness = -1; + unsigned long opt = -1; + + cur = skip_spaces(cur); + if (!*cur) + continue; + + n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid, + &seq, &end, &swappiness, &end, &opt, &end); + if (n < 4 || cur[end]) { + err = -EINVAL; + break; + } + + err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt); + if (err) + break; + } + + memalloc_noreclaim_restore(flags); + set_task_reclaim_state(current, NULL); + + free_mm_walk(sc.reclaim_state.mm_walk); + kvfree(buf); + + return err ? : len; +} + +static int lru_gen_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &lru_gen_seq_ops); +} + +static const struct file_operations lru_gen_rw_fops = { + .open = lru_gen_seq_open, + .read = seq_read, + .write = lru_gen_seq_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct file_operations lru_gen_ro_fops = { + .open = lru_gen_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/****************************************************************************** + * initialization + ******************************************************************************/ + +void lru_gen_init_lruvec(struct lruvec *lruvec) +{ + int i; + int gen, type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + lrugen->max_seq = MIN_NR_GENS + 1; + lrugen->enabled = lru_gen_enabled(); + + for (i = 0; i <= MIN_NR_GENS + 1; i++) + lrugen->timestamps[i] = jiffies; + + for_each_gen_type_zone(gen, type, zone) + INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]); + + lruvec->mm_state.seq = MIN_NR_GENS; + init_waitqueue_head(&lruvec->mm_state.wait); +} + +#ifdef CONFIG_MEMCG +void lru_gen_init_memcg(struct mem_cgroup *memcg) +{ + INIT_LIST_HEAD(&memcg->mm_list.fifo); + spin_lock_init(&memcg->mm_list.lock); +} + +void lru_gen_exit_memcg(struct mem_cgroup *memcg) +{ + int i; + int nid; + + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + VM_BUG_ON(memchr_inv(lruvec->lrugen.nr_pages, 0, + sizeof(lruvec->lrugen.nr_pages))); + + for (i = 0; i < NR_BLOOM_FILTERS; i++) { + bitmap_free(lruvec->mm_state.filters[i]); + lruvec->mm_state.filters[i] = NULL; + } + } +} +#endif + +static int __init init_lru_gen(void) +{ + BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS); + BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS); + BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1); + + if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) + pr_err("lru_gen: failed to create sysfs group\n"); + + debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops); + debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops); + + return 0; +}; +late_initcall(init_lru_gen); + +#else + +static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +{ +} + +static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ +} + +#endif /* CONFIG_LRU_GEN */ + +static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ + unsigned long nr[NR_LRU_LISTS]; + unsigned long targets[NR_LRU_LISTS]; + unsigned long nr_to_scan; + enum lru_list lru; + unsigned long nr_reclaimed = 0; + unsigned long nr_to_reclaim = sc->nr_to_reclaim; + struct blk_plug plug; + bool scan_adjusted; + + if (lru_gen_enabled()) { + lru_gen_shrink_lruvec(lruvec, sc); + return; + } + + get_scan_count(lruvec, sc, nr); + + /* Record the original scan target for proportional adjustments later */ + memcpy(targets, nr, sizeof(nr)); + + /* + * Global reclaiming within direct reclaim at DEF_PRIORITY is a normal + * event that can occur when there is little memory pressure e.g. + * multiple streaming readers/writers. Hence, we do not abort scanning + * when the requested number of pages are reclaimed when scanning at + * DEF_PRIORITY on the assumption that the fact we are direct + * reclaiming implies that kswapd is not keeping up and it is best to + * do a batch of work at once. For memcg reclaim one check is made to + * abort proportional reclaim if either the file or anon lru has already + * dropped to zero at the first pass. + */ + scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && + sc->priority == DEF_PRIORITY); + + blk_start_plug(&plug); + while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || + nr[LRU_INACTIVE_FILE]) { + unsigned long nr_anon, nr_file, percentage; + unsigned long nr_scanned; + + for_each_evictable_lru(lru) { + if (nr[lru]) { + nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); + nr[lru] -= nr_to_scan; + + nr_reclaimed += shrink_list(lru, nr_to_scan, + lruvec, sc); + } + } + + cond_resched(); + + if (nr_reclaimed < nr_to_reclaim || scan_adjusted) + continue; + + /* + * For kswapd and memcg, reclaim at least the number of pages + * requested. Ensure that the anon and file LRUs are scanned + * proportionally what was requested by get_scan_count(). We + * stop reclaiming one LRU and reduce the amount scanning + * proportional to the original scan target. + */ + nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; + nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; + + /* + * It's just vindictive to attack the larger once the smaller + * has gone to zero. And given the way we stop scanning the + * smaller below, this makes sure that we only make one nudge + * towards proportionality once we've got nr_to_reclaim. + */ + if (!nr_file || !nr_anon) + break; + + if (nr_file > nr_anon) { + unsigned long scan_target = targets[LRU_INACTIVE_ANON] + + targets[LRU_ACTIVE_ANON] + 1; + lru = LRU_BASE; + percentage = nr_anon * 100 / scan_target; + } else { + unsigned long scan_target = targets[LRU_INACTIVE_FILE] + + targets[LRU_ACTIVE_FILE] + 1; + lru = LRU_FILE; + percentage = nr_file * 100 / scan_target; + } + + /* Stop scanning the smaller of the LRU */ + nr[lru] = 0; + nr[lru + LRU_ACTIVE] = 0; + + /* + * Recalculate the other LRU scan count based on its original + * scan target and the percentage scanning already complete + */ + lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; + nr_scanned = targets[lru] - nr[lru]; + nr[lru] = targets[lru] * (100 - percentage) / 100; + nr[lru] -= min(nr[lru], nr_scanned); + + lru += LRU_ACTIVE; + nr_scanned = targets[lru] - nr[lru]; + nr[lru] = targets[lru] * (100 - percentage) / 100; + nr[lru] -= min(nr[lru], nr_scanned); + + scan_adjusted = true; + } + blk_finish_plug(&plug); + sc->nr_reclaimed += nr_reclaimed; + + /* + * Even if we did not try to evict anon pages at all, we want to + * rebalance the anon lru active/inactive ratio. + */ + if (can_age_anon_pages(lruvec_pgdat(lruvec), sc) && + inactive_is_low(lruvec, LRU_INACTIVE_ANON)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); +} + +/* Use reclaim/compaction for costly allocs or under memory pressure */ +static bool in_reclaim_compaction(struct scan_control *sc) +{ + if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && + (sc->order > PAGE_ALLOC_COSTLY_ORDER || + sc->priority < DEF_PRIORITY - 2)) + return true; + + return false; +} + +/* + * Reclaim/compaction is used for high-order allocation requests. It reclaims + * order-0 pages before compacting the zone. should_continue_reclaim() returns + * true if more pages should be reclaimed such that when the page allocator + * calls try_to_compact_pages() that it will have enough free pages to succeed. + * It will give up earlier than that if there is difficulty reclaiming pages. + */ +static inline bool should_continue_reclaim(struct pglist_data *pgdat, + unsigned long nr_reclaimed, + struct scan_control *sc) +{ + unsigned long pages_for_compaction; + unsigned long inactive_lru_pages; + int z; + + /* If not in reclaim/compaction mode, stop */ + if (!in_reclaim_compaction(sc)) + return false; + + /* + * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX + * number of pages that were scanned. This will return to the caller + * with the risk reclaim/compaction and the resulting allocation attempt * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL * allocations through requiring that the full LRU list has been scanned * first, by assuming that zero delta of sc->nr_scanned means full LRU @@ -3188,109 +5907,16 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) unsigned long nr_reclaimed, nr_scanned; struct lruvec *target_lruvec; bool reclaimable = false; - unsigned long file; target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); again: - /* - * Flush the memory cgroup stats, so that we read accurate per-memcg - * lruvec stats for heuristics. - */ - mem_cgroup_flush_stats(); - memset(&sc->nr, 0, sizeof(sc->nr)); nr_reclaimed = sc->nr_reclaimed; nr_scanned = sc->nr_scanned; - /* - * Determine the scan balance between anon and file LRUs. - */ - spin_lock_irq(&target_lruvec->lru_lock); - sc->anon_cost = target_lruvec->anon_cost; - sc->file_cost = target_lruvec->file_cost; - spin_unlock_irq(&target_lruvec->lru_lock); - - /* - * Target desirable inactive:active list ratios for the anon - * and file LRU lists. - */ - if (!sc->force_deactivate) { - unsigned long refaults; - - refaults = lruvec_page_state(target_lruvec, - WORKINGSET_ACTIVATE_ANON); - if (refaults != target_lruvec->refaults[0] || - inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) - sc->may_deactivate |= DEACTIVATE_ANON; - else - sc->may_deactivate &= ~DEACTIVATE_ANON; - - /* - * When refaults are being observed, it means a new - * workingset is being established. Deactivate to get - * rid of any stale active pages quickly. - */ - refaults = lruvec_page_state(target_lruvec, - WORKINGSET_ACTIVATE_FILE); - if (refaults != target_lruvec->refaults[1] || - inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) - sc->may_deactivate |= DEACTIVATE_FILE; - else - sc->may_deactivate &= ~DEACTIVATE_FILE; - } else - sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; - - /* - * If we have plenty of inactive file pages that aren't - * thrashing, try to reclaim those first before touching - * anonymous pages. - */ - file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); - if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) - sc->cache_trim_mode = 1; - else - sc->cache_trim_mode = 0; - - /* - * Prevent the reclaimer from falling into the cache trap: as - * cache pages start out inactive, every cache fault will tip - * the scan balance towards the file LRU. And as the file LRU - * shrinks, so does the window for rotation from references. - * This means we have a runaway feedback loop where a tiny - * thrashing file LRU becomes infinitely more attractive than - * anon pages. Try to detect this based on file LRU size. - */ - if (!cgroup_reclaim(sc)) { - unsigned long total_high_wmark = 0; - unsigned long free, anon; - int z; - - free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); - file = node_page_state(pgdat, NR_ACTIVE_FILE) + - node_page_state(pgdat, NR_INACTIVE_FILE); - - for (z = 0; z < MAX_NR_ZONES; z++) { - struct zone *zone = &pgdat->node_zones[z]; - if (!managed_zone(zone)) - continue; - - total_high_wmark += high_wmark_pages(zone); - } - - /* - * Consider anon: if that's low too, this isn't a - * runaway file reclaim problem, but rather just - * extreme pressure. Reclaim as per usual then. - */ - anon = node_page_state(pgdat, NR_INACTIVE_ANON); - - sc->file_is_tiny = - file + free <= total_high_wmark && - !(sc->may_deactivate & DEACTIVATE_ANON) && - anon >> sc->priority; - } + prepare_scan_count(pgdat, sc); shrink_node_memcgs(pgdat, sc); @@ -3547,6 +6173,9 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat) struct lruvec *target_lruvec; unsigned long refaults; + if (lru_gen_enabled()) + return; + target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat); refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); target_lruvec->refaults[0] = refaults; @@ -3917,6 +6546,11 @@ static void age_active_anon(struct pglist_data *pgdat, struct mem_cgroup *memcg; struct lruvec *lruvec; + if (lru_gen_enabled()) { + lru_gen_age_node(pgdat, sc); + return; + } + if (!can_age_anon_pages(pgdat, sc)) return; diff --git a/mm/vmstat.c b/mm/vmstat.c index 4057372745d04..9e9536df51b5a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "internal.h" @@ -2043,7 +2044,12 @@ static void __init init_cpu_node_state(void) static int vmstat_cpu_online(unsigned int cpu) { refresh_zone_stat_thresholds(); - node_set_state(cpu_to_node(cpu), N_CPU); + + if (!node_state(cpu_to_node(cpu), N_CPU)) { + node_set_state(cpu_to_node(cpu), N_CPU); + set_migration_target_nodes(); + } + return 0; } @@ -2066,6 +2072,8 @@ static int vmstat_cpu_dead(unsigned int cpu) return 0; node_clear_state(node, N_CPU); + set_migration_target_nodes(); + return 0; } @@ -2097,6 +2105,9 @@ void __init init_mm_internals(void) start_shepherd_timer(); #endif +#if defined(CONFIG_MIGRATION) && defined(CONFIG_HOTPLUG_CPU) + migrate_on_reclaim_init(); +#endif #ifdef CONFIG_PROC_FS proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); diff --git a/mm/workingset.c b/mm/workingset.c index 8c03afe1d67cb..93ee00c7e4d11 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -187,7 +187,6 @@ static unsigned int bucket_order __read_mostly; static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction, bool workingset) { - eviction >>= bucket_order; eviction &= EVICTION_MASK; eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; eviction = (eviction << NODES_SHIFT) | pgdat->node_id; @@ -212,10 +211,116 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, *memcgidp = memcgid; *pgdat = NODE_DATA(nid); - *evictionp = entry << bucket_order; + *evictionp = entry; *workingsetp = workingset; } +#ifdef CONFIG_LRU_GEN + +static int folio_lru_refs(struct folio *folio) +{ + unsigned long flags = READ_ONCE(folio->flags); + + BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT); + + /* see the comment on MAX_NR_TIERS */ + return flags & BIT(PG_workingset) ? (flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF : 0; +} + +static void *lru_gen_eviction(struct folio *folio) +{ + int hist, tier; + unsigned long token; + unsigned long min_seq; + struct lruvec *lruvec; + struct lru_gen_struct *lrugen; + int type = folio_is_file_lru(folio); + int refs = folio_lru_refs(folio); + int delta = folio_nr_pages(folio); + bool workingset = folio_test_workingset(folio); + struct mem_cgroup *memcg = folio_memcg(folio); + struct pglist_data *pgdat = folio_pgdat(folio); + + lruvec = mem_cgroup_lruvec(memcg, pgdat); + lrugen = &lruvec->lrugen; + min_seq = READ_ONCE(lrugen->min_seq[type]); + token = (min_seq << LRU_REFS_WIDTH) | refs; + + hist = lru_hist_from_seq(min_seq); + tier = lru_tier_from_refs(refs + workingset); + atomic_long_add(delta, &lrugen->evicted[hist][type][tier]); + + return pack_shadow(mem_cgroup_id(memcg), pgdat, token, workingset); +} + +static void lru_gen_refault(struct folio *folio, void *shadow) +{ + int hist, tier, refs; + int memcg_id; + bool workingset; + unsigned long token; + unsigned long min_seq; + struct lruvec *lruvec; + struct lru_gen_struct *lrugen; + struct mem_cgroup *memcg; + struct pglist_data *pgdat; + int type = folio_is_file_lru(folio); + int delta = folio_nr_pages(folio); + + unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset); + + refs = token & (BIT(LRU_REFS_WIDTH) - 1); + if (refs && !workingset) + return; + + if (folio_pgdat(folio) != pgdat) + return; + + rcu_read_lock(); + memcg = folio_memcg_rcu(folio); + if (mem_cgroup_id(memcg) != memcg_id) + goto unlock; + + token >>= LRU_REFS_WIDTH; + lruvec = mem_cgroup_lruvec(memcg, pgdat); + lrugen = &lruvec->lrugen; + min_seq = READ_ONCE(lrugen->min_seq[type]); + if (token != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH))) + goto unlock; + + hist = lru_hist_from_seq(min_seq); + tier = lru_tier_from_refs(refs + workingset); + atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]); + mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta); + + /* + * Count the following two cases as stalls: + * 1. For pages accessed through page tables, hotter pages pushed out + * hot pages which refaulted immediately. + * 2. For pages accessed through file descriptors, numbers of accesses + * might have been beyond the limit. + */ + if (lru_gen_in_fault() || refs + workingset == BIT(LRU_REFS_WIDTH)) { + folio_set_workingset(folio); + mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); + } +unlock: + rcu_read_unlock(); +} + +#else + +static void *lru_gen_eviction(struct folio *folio) +{ + return NULL; +} + +static void lru_gen_refault(struct folio *folio, void *shadow) +{ +} + +#endif /* CONFIG_LRU_GEN */ + /** * workingset_age_nonresident - age non-resident entries as LRU ages * @lruvec: the lruvec that was aged @@ -264,10 +369,14 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); + if (lru_gen_enabled()) + return lru_gen_eviction(page_folio(page)); + lruvec = mem_cgroup_lruvec(target_memcg, pgdat); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); + eviction >>= bucket_order; workingset_age_nonresident(lruvec, thp_nr_pages(page)); return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page)); } @@ -297,7 +406,13 @@ void workingset_refault(struct folio *folio, void *shadow) int memcgid; long nr; + if (lru_gen_enabled()) { + lru_gen_refault(folio, shadow); + return; + } + unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); + eviction <<= bucket_order; rcu_read_lock(); /* diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 6bd0971807721..f5686c463bc0d 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -89,18 +89,20 @@ static void ax25_kill_by_device(struct net_device *dev) sk = s->sk; if (!sk) { spin_unlock_bh(&ax25_list_lock); - s->ax25_dev = NULL; ax25_disconnect(s, ENETUNREACH); + s->ax25_dev = NULL; spin_lock_bh(&ax25_list_lock); goto again; } sock_hold(sk); spin_unlock_bh(&ax25_list_lock); lock_sock(sk); - s->ax25_dev = NULL; - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); - ax25_dev_put(ax25_dev); ax25_disconnect(s, ENETUNREACH); + s->ax25_dev = NULL; + if (sk->sk_socket) { + dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + ax25_dev_put(ax25_dev); + } release_sock(sk); spin_lock_bh(&ax25_list_lock); sock_put(sk); @@ -979,14 +981,16 @@ static int ax25_release(struct socket *sock) { struct sock *sk = sock->sk; ax25_cb *ax25; + ax25_dev *ax25_dev; if (sk == NULL) return 0; sock_hold(sk); - sock_orphan(sk); lock_sock(sk); + sock_orphan(sk); ax25 = sk_to_ax25(sk); + ax25_dev = ax25->ax25_dev; if (sk->sk_type == SOCK_SEQPACKET) { switch (ax25->state) { @@ -1048,6 +1052,10 @@ static int ax25_release(struct socket *sock) sk->sk_state_change(sk); ax25_destroy_socket(ax25); } + if (ax25_dev) { + dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + ax25_dev_put(ax25_dev); + } sock->sk = NULL; release_sock(sk); diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 15ab812c4fe4b..3a476e4f6cd0b 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -261,12 +261,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) - ax25_stop_heartbeat(ax25); - ax25_stop_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_stop_t3timer(ax25); - ax25_stop_idletimer(ax25); + if (reason == ENETUNREACH) { + del_timer_sync(&ax25->timer); + del_timer_sync(&ax25->t1timer); + del_timer_sync(&ax25->t2timer); + del_timer_sync(&ax25->t3timer); + del_timer_sync(&ax25->idletimer); + } else { + if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); + ax25_stop_t1timer(ax25); + ax25_stop_t2timer(ax25); + ax25_stop_t3timer(ax25); + ax25_stop_idletimer(ax25); + } ax25->state = AX25_STATE_0; diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h index 05e2e917fc254..e5876751f07ed 100644 --- a/net/bluetooth/eir.h +++ b/net/bluetooth/eir.h @@ -15,6 +15,11 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len); u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len); +static inline u16 eir_precalc_len(u8 data_len) +{ + return sizeof(u8) * 2 + data_len; +} + static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 04ebe901e86f0..3bb2b3b6a1c92 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -669,7 +669,9 @@ static void le_conn_timeout(struct work_struct *work) if (conn->role == HCI_ROLE_SLAVE) { /* Disable LE Advertising */ le_disable_advertising(hdev); + hci_dev_lock(hdev); hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT); + hci_dev_unlock(hdev); return; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index fc30f4c03d292..a105b7317560c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4534,7 +4534,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, if (!info) { bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", HCI_EV_INQUIRY_RESULT_WITH_RSSI); - return; + goto unlock; } bacpy(&data.bdaddr, &info->bdaddr); @@ -4565,7 +4565,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, if (!info) { bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", HCI_EV_INQUIRY_RESULT_WITH_RSSI); - return; + goto unlock; } bacpy(&data.bdaddr, &info->bdaddr); @@ -4587,7 +4587,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", HCI_EV_INQUIRY_RESULT_WITH_RSSI); } - +unlock: hci_dev_unlock(hdev); } @@ -6798,7 +6798,7 @@ static const struct hci_ev { HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt, sizeof(struct hci_ev_num_comp_blocks)), /* [0xff = HCI_EV_VENDOR] */ - HCI_EV(HCI_EV_VENDOR, msft_vendor_evt, 0), + HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE), }; static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb, @@ -6823,8 +6823,9 @@ static void hci_event_func(struct hci_dev *hdev, u8 event, struct sk_buff *skb, * decide if that is acceptable. */ if (skb->len > ev->max_len) - bt_dev_warn(hdev, "unexpected event 0x%2.2x length: %u > %u", - event, skb->len, ev->max_len); + bt_dev_warn_ratelimited(hdev, + "unexpected event 0x%2.2x length: %u > %u", + event, skb->len, ev->max_len); data = hci_ev_skb_pull(hdev, skb, event, ev->min_len); if (!data) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index ab9aa700b6b33..405d48c3e63ed 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2806,6 +2806,9 @@ static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type, if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; + if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + return 0; + memset(&cp, 0, sizeof(cp)); cp.flt_type = flt_type; @@ -2826,6 +2829,13 @@ static int hci_clear_event_filter_sync(struct hci_dev *hdev) if (!hci_dev_test_flag(hdev, HCI_EVENT_FILTER_CONFIGURED)) return 0; + /* In theory the state machine should not reach here unless + * a hci_set_event_filter_sync() call succeeds, but we do + * the check both for parity and as a future reminder. + */ + if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + return 0; + return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00, BDADDR_ANY, 0x00); } @@ -4422,7 +4432,7 @@ static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason) return err; } - return err; + return 0; } /* This function perform power off HCI command sequence as follows: @@ -4825,6 +4835,12 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev) if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; + /* Some fake CSR controllers lock up after setting this type of + * filter, so avoid sending the request altogether. + */ + if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) + return 0; + /* Always clear event filter when starting */ hci_clear_event_filter_sync(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 230a7a8196c07..15eab8b968ce8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9086,12 +9086,14 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, u16 eir_len = 0; u32 flags = 0; + /* allocate buff for LE or BR/EDR adv */ if (conn->le_adv_data_len > 0) skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED, - conn->le_adv_data_len); + sizeof(*ev) + conn->le_adv_data_len); else skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED, - 2 + name_len + 5); + sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) + + eir_precalc_len(sizeof(conn->dev_class))); ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, &conn->dst); @@ -9707,13 +9709,11 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, { struct sk_buff *skb; struct mgmt_ev_device_found *ev; - u16 eir_len; - u32 flags; + u16 eir_len = 0; + u32 flags = 0; - if (name_len) - skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 2 + name_len); - else - skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, 0); + skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, + sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0)); ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, bdaddr); @@ -9723,10 +9723,8 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, if (name) { eir_len = eir_append_data(ev->eir, 0, EIR_NAME_COMPLETE, name, name_len); - flags = 0; skb_put(skb, eir_len); } else { - eir_len = 0; flags = MGMT_DEV_FOUND_NAME_REQUEST_FAILED; } diff --git a/net/can/isotp.c b/net/can/isotp.c index d2a430b6a13bd..a95d171b3a64b 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1005,26 +1005,29 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, { struct sock *sk = sock->sk; struct sk_buff *skb; - int err = 0; - int noblock; + struct isotp_sock *so = isotp_sk(sk); + int noblock = flags & MSG_DONTWAIT; + int ret = 0; - noblock = flags & MSG_DONTWAIT; - flags &= ~MSG_DONTWAIT; + if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) + return -EINVAL; - skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!so->bound) + return -EADDRNOTAVAIL; + + flags &= ~MSG_DONTWAIT; + skb = skb_recv_datagram(sk, flags, noblock, &ret); if (!skb) - return err; + return ret; if (size < skb->len) msg->msg_flags |= MSG_TRUNC; else size = skb->len; - err = memcpy_to_msg(msg, skb->data, size); - if (err < 0) { - skb_free_datagram(sk, skb); - return err; - } + ret = memcpy_to_msg(msg, skb->data, size); + if (ret < 0) + goto out_err; sock_recv_timestamp(msg, sk, skb); @@ -1034,9 +1037,13 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } + /* set length of return value */ + ret = (flags & MSG_TRUNC) ? skb->len : size; + +out_err: skb_free_datagram(sk, skb); - return size; + return ret; } static int isotp_release(struct socket *sock) @@ -1104,6 +1111,7 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct net *net = sock_net(sk); int ifindex; struct net_device *dev; + canid_t tx_id, rx_id; int err = 0; int notify_enetdown = 0; int do_rx_reg = 1; @@ -1111,8 +1119,18 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (len < ISOTP_MIN_NAMELEN) return -EINVAL; - if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) - return -EADDRNOTAVAIL; + /* sanitize tx/rx CAN identifiers */ + tx_id = addr->can_addr.tp.tx_id; + if (tx_id & CAN_EFF_FLAG) + tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); + else + tx_id &= CAN_SFF_MASK; + + rx_id = addr->can_addr.tp.rx_id; + if (rx_id & CAN_EFF_FLAG) + rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); + else + rx_id &= CAN_SFF_MASK; if (!addr->can_ifindex) return -ENODEV; @@ -1124,21 +1142,13 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) do_rx_reg = 0; /* do not validate rx address for functional addressing */ - if (do_rx_reg) { - if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) { - err = -EADDRNOTAVAIL; - goto out; - } - - if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) { - err = -EADDRNOTAVAIL; - goto out; - } + if (do_rx_reg && rx_id == tx_id) { + err = -EADDRNOTAVAIL; + goto out; } if (so->bound && addr->can_ifindex == so->ifindex && - addr->can_addr.tp.rx_id == so->rxid && - addr->can_addr.tp.tx_id == so->txid) + rx_id == so->rxid && tx_id == so->txid) goto out; dev = dev_get_by_index(net, addr->can_ifindex); @@ -1162,8 +1172,7 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; if (do_rx_reg) - can_rx_register(net, dev, addr->can_addr.tp.rx_id, - SINGLE_MASK(addr->can_addr.tp.rx_id), + can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id), isotp_rcv, sk, "isotp", sk); dev_put(dev); @@ -1183,8 +1192,8 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) /* switch to new settings */ so->ifindex = ifindex; - so->rxid = addr->can_addr.tp.rx_id; - so->txid = addr->can_addr.tp.tx_id; + so->rxid = rx_id; + so->txid = tx_id; so->bound = 1; out: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ea51e23e9247e..a8a2fb745274c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -201,7 +201,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data, skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; + skb_set_end_offset(skb, size); skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; @@ -1736,11 +1736,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->head = data; skb->head_frag = 0; skb->data += off; + + skb_set_end_offset(skb, size); #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; off = nhead; -#else - skb->end = skb->head + size; #endif skb->tail += off; skb_headers_offset_update(skb, nhead); @@ -1788,6 +1787,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) } EXPORT_SYMBOL(skb_realloc_headroom); +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + unsigned int saved_end_offset, saved_truesize; + struct skb_shared_info *shinfo; + int res; + + saved_end_offset = skb_end_offset(skb); + saved_truesize = skb->truesize; + + res = pskb_expand_head(skb, 0, 0, pri); + if (res) + return res; + + skb->truesize = saved_truesize; + + if (likely(skb_end_offset(skb) == saved_end_offset)) + return 0; + + shinfo = skb_shinfo(skb); + + /* We are about to change back skb->end, + * we need to move skb_shinfo() to its new location. + */ + memmove(skb->head + saved_end_offset, + shinfo, + offsetof(struct skb_shared_info, frags[shinfo->nr_frags])); + + skb_set_end_offset(skb, saved_end_offset); + + return 0; +} + /** * skb_expand_head - reallocate header of &sk_buff * @skb: buffer to reallocate @@ -6044,11 +6075,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb->head = data; skb->data = data; skb->head_frag = 0; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; @@ -6186,11 +6213,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb->head = data; skb->head_frag = 0; skb->data = data; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_reset_tail_pointer(skb); skb_headers_offset_update(skb, 0); skb->cloned = 0; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 929a2b096b04e..cc381165ea080 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, int elem_first_coalesce) { struct page_frag *pfrag = sk_page_frag(sk); + u32 osize = msg->sg.size; int ret = 0; len -= msg->sg.size; @@ -35,13 +36,17 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, u32 orig_offset; int use, i; - if (!sk_page_frag_refill(sk, pfrag)) - return -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) { + ret = -ENOMEM; + goto msg_trim; + } orig_offset = pfrag->offset; use = min_t(int, len, pfrag->size - orig_offset); - if (!sk_wmem_schedule(sk, use)) - return -ENOMEM; + if (!sk_wmem_schedule(sk, use)) { + ret = -ENOMEM; + goto msg_trim; + } i = msg->sg.end; sk_msg_iter_var_prev(i); @@ -71,6 +76,10 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, } return ret; + +msg_trim: + sk_msg_trim(sk, msg, osize); + return ret; } EXPORT_SYMBOL_GPL(sk_msg_alloc); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 88e2808019b47..a39bbed77f87d 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1722,6 +1722,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds) struct dsa_port *dp; mutex_lock(&dsa2_mutex); + + if (!ds->setup) + goto out; + rtnl_lock(); dsa_switch_for_each_user_port(dp, ds) { @@ -1738,6 +1742,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds) dp->master->dsa_ptr = NULL; rtnl_unlock(); +out: mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_switch_shutdown); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e3c7d2627a619..517cc83d13cc8 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -113,26 +113,15 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, return dsa_tag_8021q_bridge_join(ds, info); } -static int dsa_switch_bridge_leave(struct dsa_switch *ds, - struct dsa_notifier_bridge_info *info) +static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) { - struct dsa_switch_tree *dst = ds->dst; struct netlink_ext_ack extack = {0}; bool change_vlan_filtering = false; bool vlan_filtering; struct dsa_port *dp; int err; - if (dst->index == info->tree_index && ds->index == info->sw_index && - ds->ops->port_bridge_leave) - ds->ops->port_bridge_leave(ds, info->port, info->bridge); - - if ((dst->index != info->tree_index || ds->index != info->sw_index) && - ds->ops->crosschip_bridge_leave) - ds->ops->crosschip_bridge_leave(ds, info->tree_index, - info->sw_index, info->port, - info->bridge); - if (ds->needs_standalone_vlan_filtering && !br_vlan_enabled(info->bridge.dev)) { change_vlan_filtering = true; @@ -172,6 +161,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, return err; } + return 0; +} + +static int dsa_switch_bridge_leave(struct dsa_switch *ds, + struct dsa_notifier_bridge_info *info) +{ + struct dsa_switch_tree *dst = ds->dst; + int err; + + if (dst->index == info->tree_index && ds->index == info->sw_index && + ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, info->port, info->bridge); + + if ((dst->index != info->tree_index || ds->index != info->sw_index) && + ds->ops->crosschip_bridge_leave) + ds->ops->crosschip_bridge_leave(ds, info->tree_index, + info->sw_index, info->port, + info->bridge); + + if (ds->dst->index == info->tree_index && ds->index == info->sw_index) { + err = dsa_switch_sync_vlan_filtering(ds, info); + if (err) + return err; + } + return dsa_tag_8021q_bridge_leave(ds, info); } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 87983e70f03f3..a833a7a67ce79 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -669,6 +669,24 @@ config TCP_CONG_BBR AQM schemes that do not provide a delay signal. It requires the fq ("Fair Queue") pacing packet scheduler. +config TCP_CONG_BBR2 + tristate "BBR2 TCP" + default n + help + + BBR2 TCP congestion control is a model-based congestion control + algorithm that aims to maximize network utilization, keep queues and + retransmit rates low, and to be able to coexist with Reno/CUBIC in + common scenarios. It builds an explicit model of the network path. It + tolerates a targeted degree of random packet loss and delay that are + unrelated to congestion. It can operate over LAN, WAN, cellular, wifi, + or cable modem links, and can use DCTCP-L4S-style ECN signals. It can + coexist with flows that use loss-based congestion control, and can + operate with shallow buffers, deep buffers, bufferbloat, policers, or + AQM schemes that do not provide a delay signal. It requires pacing, + using either TCP internal pacing or the fq ("Fair Queue") pacing packet + scheduler. + choice prompt "Default TCP congestion control" default DEFAULT_CUBIC @@ -706,6 +724,9 @@ choice config DEFAULT_BBR bool "BBR" if TCP_CONG_BBR=y + config DEFAULT_BBR2 + bool "BBR2" if TCP_CONG_BBR2=y + config DEFAULT_RENO bool "Reno" endchoice @@ -730,6 +751,7 @@ config DEFAULT_TCP_CONG default "dctcp" if DEFAULT_DCTCP default "cdg" if DEFAULT_CDG default "bbr" if DEFAULT_BBR + default "bbr2" if DEFAULT_BBR2 default "cubic" config TCP_MD5SIG diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index bbdd9c44f14e3..8dee1547d8202 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o +obj-$(CONFIG_TCP_CONG_BBR2) += tcp_bbr2.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index de610cb83694f..afda34c26c627 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -20,7 +20,7 @@ static u32 optional_ops[] = { offsetof(struct tcp_congestion_ops, cwnd_event), offsetof(struct tcp_congestion_ops, in_ack_event), offsetof(struct tcp_congestion_ops, pkts_acked), - offsetof(struct tcp_congestion_ops, min_tso_segs), + offsetof(struct tcp_congestion_ops, tso_segs), offsetof(struct tcp_congestion_ops, sndbuf_expand), offsetof(struct tcp_congestion_ops, cong_control), }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f33ad1f383b68..d5d058de36646 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -499,6 +499,15 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); +static void ip_rt_fix_tos(struct flowi4 *fl4) +{ + __u8 tos = RT_FL_TOS(fl4); + + fl4->flowi4_tos = tos & IPTOS_RT_MASK; + fl4->flowi4_scope = tos & RTO_ONLINK ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; +} + static void __build_flow_key(const struct net *net, struct flowi4 *fl4, const struct sock *sk, const struct iphdr *iph, @@ -824,6 +833,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); + ip_rt_fix_tos(&fl4); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1048,6 +1058,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); + ip_rt_fix_tos(&fl4); /* Don't make lookup fail for bridged encapsulations */ if (skb && netif_is_any_bridge_port(skb->dev)) @@ -1122,6 +1133,8 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; new = true; + } else { + ip_rt_fix_tos(&fl4); } __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); @@ -2603,7 +2616,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, const struct sk_buff *skb) { - __u8 tos = RT_FL_TOS(fl4); struct fib_result res = { .type = RTN_UNSPEC, .fi = NULL, @@ -2613,9 +2625,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, struct rtable *rth; fl4->flowi4_iif = LOOPBACK_IFINDEX; - fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); + ip_rt_fix_tos(fl4); rcu_read_lock(); rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 28ff2a820f7c9..857f7cef2bd49 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3085,6 +3085,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; tp->rcv_ooopack = 0; + tp->fast_ack_mode = 0; /* Clean up fastopen related fields */ diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index ec5550089b4d2..56e719166d4c3 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -294,26 +294,40 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) sk->sk_pacing_rate = rate; } -/* override sysctl_tcp_min_tso_segs */ static u32 bbr_min_tso_segs(struct sock *sk) { return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; } +/* Return the number of segments BBR would like in a TSO/GSO skb, given + * a particular max gso size as a constraint. + */ +static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, + u32 gso_max_size) +{ + u32 segs; + u64 bytes; + + /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ + bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; + + bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); + return segs; +} + +/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ +static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) +{ + return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); +} + +/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - u32 segs, bytes; - - /* Sort of tcp_tso_autosize() but ignoring - * driver provided sk_gso_max_size. - */ - bytes = min_t(unsigned long, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), - GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); - segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - return min(segs, 0x7FU); + return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -1149,7 +1163,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, - .min_tso_segs = bbr_min_tso_segs, + .tso_segs = bbr_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c new file mode 100644 index 0000000000000..fa49e17c47ca9 --- /dev/null +++ b/net/ipv4/tcp_bbr2.c @@ -0,0 +1,2674 @@ +/* BBR (Bottleneck Bandwidth and RTT) congestion control, v2 + * + * BBRv2 is a model-based congestion control algorithm that aims for low + * queues, low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model + * of the network path, it uses measurements of bandwidth and RTT, as well as + * (if they occur) packet loss and/or DCTCP/L4S-style ECN signals. Note that + * although it can use ECN or loss signals explicitly, it does not require + * either; it can bound its in-flight data based on its estimate of the BDP. + * + * The model has both higher and lower bounds for the operating range: + * lo: bw_lo, inflight_lo: conservative short-term lower bound + * hi: bw_hi, inflight_hi: robust long-term upper bound + * The bandwidth-probing time scale is (a) extended dynamically based on + * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by + * an interactive wall-clock time-scale to be more scalable and responsive + * than Reno and CUBIC. + * + * Here is a state transition diagram for BBR: + * + * | + * V + * +---> STARTUP ----+ + * | | | + * | V | + * | DRAIN ----+ + * | | | + * | V | + * +---> PROBE_BW ----+ + * | ^ | | + * | | | | + * | +----+ | + * | | + * +---- PROBE_RTT <--+ + * + * A BBR flow starts in STARTUP, and ramps up its sending rate quickly. + * When it estimates the pipe is full, it enters DRAIN to drain the queue. + * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT. + * A long-lived BBR flow spends the vast majority of its time remaining + * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth + * in a fair manner, with a small, bounded queue. *If* a flow has been + * continuously sending for the entire min_rtt window, and hasn't seen an RTT + * sample that matches or decreases its min_rtt estimate for 10 seconds, then + * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe + * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if + * we estimated that we reached the full bw of the pipe then we enter PROBE_BW; + * otherwise we enter STARTUP to try to fill the pipe. + * + * BBR is described in detail in: + * "BBR: Congestion-Based Congestion Control", + * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, + * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016. + * + * There is a public e-mail list for discussing BBR development and testing: + * https://groups.google.com/forum/#!forum/bbr-dev + * + * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled, + * otherwise TCP stack falls back to an internal pacing using one high + * resolution timer per TCP socket and may use more resources. + */ +#include +#include +#include +#include +#include + +#include "tcp_dctcp.h" + +/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth + * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. + * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. + * Since the minimum window is >=4 packets, the lower bound isn't + * an issue. The upper bound isn't an issue with existing technologies. + */ +#define BW_SCALE 24 +#define BW_UNIT (1 << BW_SCALE) + +#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */ +#define BBR_UNIT (1 << BBR_SCALE) + +#define FLAG_DEBUG_VERBOSE 0x1 /* Verbose debugging messages */ +#define FLAG_DEBUG_LOOPBACK 0x2 /* Do NOT skip loopback addr */ + +#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ + +/* BBR has the following modes for deciding how fast to send: */ +enum bbr_mode { + BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ + BBR_DRAIN, /* drain any queue created during startup */ + BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ + BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ +}; + +/* How does the incoming ACK stream relate to our bandwidth probing? */ +enum bbr_ack_phase { + BBR_ACKS_INIT, /* not probing; not getting probe feedback */ + BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */ + BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */ + BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */ + BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */ +}; + +/* BBR congestion control block */ +struct bbr { + u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ + u32 min_rtt_stamp; /* timestamp of min_rtt_us */ + u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ + u32 probe_rtt_min_us; /* min RTT in bbr_probe_rtt_win_ms window */ + u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/ + u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ + u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */ + u64 cycle_mstamp; /* time of this cycle phase start */ + u32 mode:3, /* current bbr_mode in state machine */ + prev_ca_state:3, /* CA state on previous ACK */ + packet_conservation:1, /* use packet conservation? */ + round_start:1, /* start of packet-timed tx->ack round? */ + ce_state:1, /* If most recent data has CE bit set */ + bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */ + try_fast_path:1, /* can we take fast path? */ + unused2:11, + idle_restart:1, /* restarting after idle? */ + probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ + cycle_idx:3, /* current index in pacing_gain cycle array */ + has_seen_rtt:1; /* have we seen an RTT sample yet? */ + u32 pacing_gain:11, /* current gain for setting pacing rate */ + cwnd_gain:11, /* current gain for setting cwnd */ + full_bw_reached:1, /* reached full bw in Startup? */ + full_bw_cnt:2, /* number of rounds without large bw gains */ + init_cwnd:7; /* initial cwnd */ + u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ + u32 full_bw; /* recent bw, to estimate if pipe is full */ + + /* For tracking ACK aggregation: */ + u64 ack_epoch_mstamp; /* start of ACK sampling epoch */ + u16 extra_acked[2]; /* max excess data ACKed in epoch */ + u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ + extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ + extra_acked_win_idx:1, /* current index in extra_acked array */ + /* BBR v2 state: */ + unused1:2, + startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */ + loss_in_cycle:1, /* packet loss in this cycle? */ + ecn_in_cycle:1; /* ECN in this cycle? */ + u32 loss_round_delivered; /* scb->tx.delivered ending loss round */ + u32 undo_bw_lo; /* bw_lo before latest losses */ + u32 undo_inflight_lo; /* inflight_lo before latest losses */ + u32 undo_inflight_hi; /* inflight_hi before latest losses */ + u32 bw_latest; /* max delivered bw in last round trip */ + u32 bw_lo; /* lower bound on sending bandwidth */ + u32 bw_hi[2]; /* upper bound of sending bandwidth range*/ + u32 inflight_latest; /* max delivered data in last round trip */ + u32 inflight_lo; /* lower bound of inflight data range */ + u32 inflight_hi; /* upper bound of inflight data range */ + u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */ + u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */ + u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */ + u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */ + ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */ + bw_probe_samples:1, /* rate samples reflect bw probing? */ + prev_probe_too_high:1, /* did last PROBE_UP go too high? */ + stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */ + rounds_since_probe:8, /* packet-timed rounds since probed bw */ + loss_round_start:1, /* loss_round_delivered round trip? */ + loss_in_round:1, /* loss marked in this round trip? */ + ecn_in_round:1, /* ECN marked in this round trip? */ + ack_phase:3, /* bbr_ack_phase: meaning of ACKs */ + loss_events_in_round:4,/* losses in STARTUP round */ + initialized:1; /* has bbr_init() been called? */ + u32 alpha_last_delivered; /* tp->delivered at alpha update */ + u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */ + + /* Params configurable using setsockopt. Refer to correspoding + * module param for detailed description of params. + */ + struct bbr_params { + u32 high_gain:11, /* max allowed value: 2047 */ + drain_gain:10, /* max allowed value: 1023 */ + cwnd_gain:11; /* max allowed value: 2047 */ + u32 cwnd_min_target:4, /* max allowed value: 15 */ + min_rtt_win_sec:5, /* max allowed value: 31 */ + probe_rtt_mode_ms:9, /* max allowed value: 511 */ + full_bw_cnt:3, /* max allowed value: 7 */ + cwnd_tso_budget:1, /* allowed values: {0, 1} */ + unused3:6, + drain_to_target:1, /* boolean */ + precise_ece_ack:1, /* boolean */ + extra_acked_in_startup:1, /* allowed values: {0, 1} */ + fast_path:1; /* boolean */ + u32 full_bw_thresh:10, /* max allowed value: 1023 */ + startup_cwnd_gain:11, /* max allowed value: 2047 */ + bw_probe_pif_gain:9, /* max allowed value: 511 */ + usage_based_cwnd:1, /* boolean */ + unused2:1; + u16 probe_rtt_win_ms:14, /* max allowed value: 16383 */ + refill_add_inc:2; /* max allowed value: 3 */ + u16 extra_acked_gain:11, /* max allowed value: 2047 */ + extra_acked_win_rtts:5; /* max allowed value: 31*/ + u16 pacing_gain[CYCLE_LEN]; /* max allowed value: 1023 */ + /* Mostly BBR v2 parameters below here: */ + u32 ecn_alpha_gain:8, /* max allowed value: 255 */ + ecn_factor:8, /* max allowed value: 255 */ + ecn_thresh:8, /* max allowed value: 255 */ + beta:8; /* max allowed value: 255 */ + u32 ecn_max_rtt_us:19, /* max allowed value: 524287 */ + bw_probe_reno_gain:9, /* max allowed value: 511 */ + full_loss_cnt:4; /* max allowed value: 15 */ + u32 probe_rtt_cwnd_gain:8, /* max allowed value: 255 */ + inflight_headroom:8, /* max allowed value: 255 */ + loss_thresh:8, /* max allowed value: 255 */ + bw_probe_max_rounds:8; /* max allowed value: 255 */ + u32 bw_probe_rand_rounds:4, /* max allowed value: 15 */ + bw_probe_base_us:26, /* usecs: 0..2^26-1 (67 secs) */ + full_ecn_cnt:2; /* max allowed value: 3 */ + u32 bw_probe_rand_us:26, /* usecs: 0..2^26-1 (67 secs) */ + undo:1, /* boolean */ + tso_rtt_shift:4, /* max allowed value: 15 */ + unused5:1; + u32 ecn_reprobe_gain:9, /* max allowed value: 511 */ + unused1:14, + ecn_alpha_init:9; /* max allowed value: 256 */ + } params; + + struct { + u32 snd_isn; /* Initial sequence number */ + u32 rs_bw; /* last valid rate sample bw */ + u32 target_cwnd; /* target cwnd, based on BDP */ + u8 undo:1, /* Undo even happened but not yet logged */ + unused:7; + char event; /* single-letter event debug codes */ + u16 unused2; + } debug; +}; + +struct bbr_context { + u32 sample_bw; + u32 target_cwnd; + u32 log:1; +}; + +/* Window length of min_rtt filter (in sec). Max allowed value is 31 (0x1F) */ +static u32 bbr_min_rtt_win_sec = 10; +/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode. + * Max allowed value is 511 (0x1FF). + */ +static u32 bbr_probe_rtt_mode_ms = 200; +/* Window length of probe_rtt_min_us filter (in ms), and consequently the + * typical interval between PROBE_RTT mode entries. + * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC + */ +static u32 bbr_probe_rtt_win_ms = 5000; +/* Skip TSO below the following bandwidth (bits/sec): */ +static int bbr_min_tso_rate = 1200000; + +/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting + * in bigger TSO bursts. By default we cut the RTT-based allowance in half + * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance + * is below 1500 bytes after 6 * ~500 usec = 3ms. + */ +static u32 bbr_tso_rtt_shift = 9; /* halve allowance per 2^9 usecs, 512us */ + +/* Select cwnd TSO budget approach: + * 0: padding + * 1: flooring + */ +static uint bbr_cwnd_tso_budget = 1; + +/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. + * In order to help drive the network toward lower queues and low latency while + * maintaining high utilization, the average pacing rate aims to be slightly + * lower than the estimated bandwidth. This is an important aspect of the + * design. + */ +static const int bbr_pacing_margin_percent = 1; + +/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain + * that will allow a smoothly increasing pacing rate that will double each RTT + * and send the same number of packets per RTT that an un-paced, slow-starting + * Reno or CUBIC flow would. Max allowed value is 2047 (0x7FF). + */ +static int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The gain for deriving startup cwnd. Max allowed value is 2047 (0x7FF). */ +static int bbr_startup_cwnd_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain + * the queue created in BBR_STARTUP in a single round. Max allowed value + * is 1023 (0x3FF). + */ +static int bbr_drain_gain = BBR_UNIT * 1000 / 2885; +/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs. + * Max allowed value is 2047 (0x7FF). + */ +static int bbr_cwnd_gain = BBR_UNIT * 2; +/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw. + * Max allowed value for each element is 1023 (0x3FF). + */ +enum bbr_pacing_gain_phase { + BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */ + BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */ + BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */ + BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */ +}; +static int bbr_pacing_gain[] = { + BBR_UNIT * 5 / 4, /* probe for more available bw */ + BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ + BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ + BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ +}; + +/* Try to keep at least this many packets in flight, if things go smoothly. For + * smooth functioning, a sliding window protocol ACKing every other packet + * needs at least 4 packets in flight. Max allowed value is 15 (0xF). + */ +static u32 bbr_cwnd_min_target = 4; + +/* Cwnd to BDP proportion in PROBE_RTT mode scaled by BBR_UNIT. Default: 50%. + * Use 0 to disable. Max allowed value is 255. + */ +static u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2; + +/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ +/* If bw has increased significantly (1.25x), there may be more bw available. + * Max allowed value is 1023 (0x3FF). + */ +static u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; +/* But after 3 rounds w/o significant bw growth, estimate pipe is full. + * Max allowed value is 7 (0x7). + */ +static u32 bbr_full_bw_cnt = 3; + +static u32 bbr_flags; /* Debugging related stuff */ + +/* Whether to debug using printk. + */ +static bool bbr_debug_with_printk; + +/* Whether to debug using ftrace event tcp:tcp_bbr_event. + * Ignored when bbr_debug_with_printk is set. + */ +static bool bbr_debug_ftrace; + +/* Experiment: each cycle, try to hold sub-unity gain until inflight <= BDP. */ +static bool bbr_drain_to_target = true; /* default: enabled */ + +/* Experiment: Flags to control BBR with ECN behavior. + */ +static bool bbr_precise_ece_ack = true; /* default: enabled */ + +/* The max rwin scaling shift factor is 14 (RFC 1323), so the max sane rwin is + * (2^(16+14) B)/(1024 B/packet) = 1M packets. + */ +static u32 bbr_cwnd_warn_val = 1U << 20; + +static u16 bbr_debug_port_mask; + +/* BBR module parameters. These are module parameters only in Google prod. + * Upstream these are intentionally not module parameters. + */ +static int bbr_pacing_gain_size = CYCLE_LEN; + +/* Gain factor for adding extra_acked to target cwnd: */ +static int bbr_extra_acked_gain = 256; + +/* Window length of extra_acked window. Max allowed val is 31. */ +static u32 bbr_extra_acked_win_rtts = 5; + +/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */ +static u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; + +/* Time period for clamping cwnd increment due to ack aggregation */ +static u32 bbr_extra_acked_max_us = 100 * 1000; + +/* Use extra acked in startup ? + * 0: disabled + * 1: use latest extra_acked value from 1-2 rtt in startup + */ +static int bbr_extra_acked_in_startup = 1; /* default: enabled */ + +/* Experiment: don't grow cwnd beyond twice of what we just probed. */ +static bool bbr_usage_based_cwnd; /* default: disabled */ + +/* For lab testing, researchers can enable BBRv2 ECN support with this flag, + * when they know that any ECN marks that the connections experience will be + * DCTCP/L4S-style ECN marks, rather than RFC3168 ECN marks. + * TODO(ncardwell): Production use of the BBRv2 ECN functionality depends on + * negotiation or configuration that is outside the scope of the BBRv2 + * alpha release. + */ +static bool bbr_ecn_enable = false; + +module_param_named(min_tso_rate, bbr_min_tso_rate, int, 0644); +module_param_named(tso_rtt_shift, bbr_tso_rtt_shift, int, 0644); +module_param_named(high_gain, bbr_high_gain, int, 0644); +module_param_named(drain_gain, bbr_drain_gain, int, 0644); +module_param_named(startup_cwnd_gain, bbr_startup_cwnd_gain, int, 0644); +module_param_named(cwnd_gain, bbr_cwnd_gain, int, 0644); +module_param_array_named(pacing_gain, bbr_pacing_gain, int, + &bbr_pacing_gain_size, 0644); +module_param_named(cwnd_min_target, bbr_cwnd_min_target, uint, 0644); +module_param_named(probe_rtt_cwnd_gain, + bbr_probe_rtt_cwnd_gain, uint, 0664); +module_param_named(cwnd_warn_val, bbr_cwnd_warn_val, uint, 0664); +module_param_named(debug_port_mask, bbr_debug_port_mask, ushort, 0644); +module_param_named(flags, bbr_flags, uint, 0644); +module_param_named(debug_ftrace, bbr_debug_ftrace, bool, 0644); +module_param_named(debug_with_printk, bbr_debug_with_printk, bool, 0644); +module_param_named(min_rtt_win_sec, bbr_min_rtt_win_sec, uint, 0644); +module_param_named(probe_rtt_mode_ms, bbr_probe_rtt_mode_ms, uint, 0644); +module_param_named(probe_rtt_win_ms, bbr_probe_rtt_win_ms, uint, 0644); +module_param_named(full_bw_thresh, bbr_full_bw_thresh, uint, 0644); +module_param_named(full_bw_cnt, bbr_full_bw_cnt, uint, 0644); +module_param_named(cwnd_tso_bduget, bbr_cwnd_tso_budget, uint, 0664); +module_param_named(extra_acked_gain, bbr_extra_acked_gain, int, 0664); +module_param_named(extra_acked_win_rtts, + bbr_extra_acked_win_rtts, uint, 0664); +module_param_named(extra_acked_max_us, + bbr_extra_acked_max_us, uint, 0664); +module_param_named(ack_epoch_acked_reset_thresh, + bbr_ack_epoch_acked_reset_thresh, uint, 0664); +module_param_named(drain_to_target, bbr_drain_to_target, bool, 0664); +module_param_named(precise_ece_ack, bbr_precise_ece_ack, bool, 0664); +module_param_named(extra_acked_in_startup, + bbr_extra_acked_in_startup, int, 0664); +module_param_named(usage_based_cwnd, bbr_usage_based_cwnd, bool, 0664); +module_param_named(ecn_enable, bbr_ecn_enable, bool, 0664); + +static void bbr2_exit_probe_rtt(struct sock *sk); +static void bbr2_reset_congestion_signals(struct sock *sk); + +static void bbr_check_probe_rtt_done(struct sock *sk); + +/* Do we estimate that STARTUP filled the pipe? */ +static bool bbr_full_bw_reached(const struct sock *sk) +{ + const struct bbr *bbr = inet_csk_ca(sk); + + return bbr->full_bw_reached; +} + +/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ +static u32 bbr_max_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return max(bbr->bw_hi[0], bbr->bw_hi[1]); +} + +/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ +static u32 bbr_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return min(bbr_max_bw(sk), bbr->bw_lo); +} + +/* Return maximum extra acked in past k-2k round trips, + * where k = bbr_extra_acked_win_rtts. + */ +static u16 bbr_extra_acked(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return max(bbr->extra_acked[0], bbr->extra_acked[1]); +} + +/* Return rate in bytes per second, optionally with a gain. + * The order here is chosen carefully to avoid overflow of u64. This should + * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. + */ +static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain, + int margin) +{ + unsigned int mss = tcp_sk(sk)->mss_cache; + + rate *= mss; + rate *= gain; + rate >>= BBR_SCALE; + rate *= USEC_PER_SEC / 100 * (100 - margin); + rate >>= BW_SCALE; + rate = max(rate, 1ULL); + return rate; +} + +static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate) +{ + return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0); +} + +static u64 bbr_rate_kbps(struct sock *sk, u64 rate) +{ + rate = bbr_bw_bytes_per_sec(sk, rate); + rate *= 8; + do_div(rate, 1000); + return rate; +} + +static u32 bbr_tso_segs_goal(struct sock *sk); +static void bbr_debug(struct sock *sk, u32 acked, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + static const char ca_states[] = { + [TCP_CA_Open] = 'O', + [TCP_CA_Disorder] = 'D', + [TCP_CA_CWR] = 'C', + [TCP_CA_Recovery] = 'R', + [TCP_CA_Loss] = 'L', + }; + static const char mode[] = { + 'G', /* Growing - BBR_STARTUP */ + 'D', /* Drain - BBR_DRAIN */ + 'W', /* Window - BBR_PROBE_BW */ + 'M', /* Min RTT - BBR_PROBE_RTT */ + }; + static const char ack_phase[] = { /* bbr_ack_phase strings */ + 'I', /* BBR_ACKS_INIT - 'Init' */ + 'R', /* BBR_ACKS_REFILLING - 'Refilling' */ + 'B', /* BBR_ACKS_PROBE_STARTING - 'Before' */ + 'F', /* BBR_ACKS_PROBE_FEEDBACK - 'Feedback' */ + 'A', /* BBR_ACKS_PROBE_STOPPING - 'After' */ + }; + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + const u32 una = tp->snd_una - bbr->debug.snd_isn; + const u32 fack = tcp_highest_sack_seq(tp); + const u16 dport = ntohs(inet_sk(sk)->inet_dport); + bool is_port_match = (bbr_debug_port_mask && + ((dport & bbr_debug_port_mask) == 0)); + char debugmsg[320]; + + if (sk->sk_state == TCP_SYN_SENT) + return; /* no bbr_init() yet if SYN retransmit -> CA_Loss */ + + if (!tp->snd_cwnd || tp->snd_cwnd > bbr_cwnd_warn_val) { + char addr[INET6_ADDRSTRLEN + 10] = { 0 }; + + if (sk->sk_family == AF_INET) + snprintf(addr, sizeof(addr), "%pI4:%u", + &inet_sk(sk)->inet_daddr, dport); + else if (sk->sk_family == AF_INET6) + snprintf(addr, sizeof(addr), "%pI6:%u", + &sk->sk_v6_daddr, dport); + + WARN_ONCE(1, + "BBR %s cwnd alert: %u " + "snd_una: %u ca: %d pacing_gain: %u cwnd_gain: %u " + "bw: %u rtt: %u min_rtt: %u " + "acked: %u tso_segs: %u " + "bw: %d %ld %d pif: %u\n", + addr, tp->snd_cwnd, + una, inet_csk(sk)->icsk_ca_state, + bbr->pacing_gain, bbr->cwnd_gain, + bbr_max_bw(sk), (tp->srtt_us >> 3), bbr->min_rtt_us, + acked, bbr_tso_segs_goal(sk), + rs->delivered, rs->interval_us, rs->is_retrans, + tcp_packets_in_flight(tp)); + } + + if (likely(!bbr_debug_with_printk && !bbr_debug_ftrace)) + return; + + if (!sock_flag(sk, SOCK_DBG) && !is_port_match) + return; + + if (!ctx->log && !tp->app_limited && !(bbr_flags & FLAG_DEBUG_VERBOSE)) + return; + + if (ipv4_is_loopback(inet_sk(sk)->inet_daddr) && + !(bbr_flags & FLAG_DEBUG_LOOPBACK)) + return; + + snprintf(debugmsg, sizeof(debugmsg) - 1, + "BBR %pI4:%-5u %5u,%03u:%-7u %c " + "%c %2u br %2u cr %2d rtt %5ld d %2d i %5ld mrtt %d %cbw %llu " + "bw %llu lb %llu ib %llu qb %llu " + "a %u if %2u %c %c dl %u l %u al %u # %u t %u %c %c " + "lr %d er %d ea %d bwl %lld il %d ih %d c %d " + "v %d %c %u %c %s\n", + &inet_sk(sk)->inet_daddr, dport, + una / 1000, una % 1000, fack - tp->snd_una, + ca_states[inet_csk(sk)->icsk_ca_state], + bbr->debug.undo ? '@' : mode[bbr->mode], + tp->snd_cwnd, + bbr_extra_acked(sk), /* br (legacy): extra_acked */ + rs->tx_in_flight, /* cr (legacy): tx_inflight */ + rs->rtt_us, + rs->delivered, + rs->interval_us, + bbr->min_rtt_us, + rs->is_app_limited ? '_' : 'l', + bbr_rate_kbps(sk, ctx->sample_bw), /* lbw: latest sample bw */ + bbr_rate_kbps(sk, bbr_max_bw(sk)), /* bw: max bw */ + 0ULL, /* lb: [obsolete] */ + 0ULL, /* ib: [obsolete] */ + (u64)sk->sk_pacing_rate * 8 / 1000, + acked, + tcp_packets_in_flight(tp), + rs->is_ack_delayed ? 'd' : '.', + bbr->round_start ? '*' : '.', + tp->delivered, tp->lost, + tp->app_limited, + 0, /* #: [obsolete] */ + ctx->target_cwnd, + tp->reord_seen ? 'r' : '.', /* r: reordering seen? */ + ca_states[bbr->prev_ca_state], + (rs->lost + rs->delivered) > 0 ? + (1000 * rs->lost / + (rs->lost + rs->delivered)) : 0, /* lr: loss rate x1000 */ + (rs->delivered) > 0 ? + (1000 * rs->delivered_ce / + (rs->delivered)) : 0, /* er: ECN rate x1000 */ + 1000 * bbr->ecn_alpha >> BBR_SCALE, /* ea: ECN alpha x1000 */ + bbr->bw_lo == ~0U ? + -1 : (s64)bbr_rate_kbps(sk, bbr->bw_lo), /* bwl */ + bbr->inflight_lo, /* il */ + bbr->inflight_hi, /* ih */ + bbr->bw_probe_up_cnt, /* c */ + 2, /* v: version */ + bbr->debug.event, + bbr->cycle_idx, + ack_phase[bbr->ack_phase], + bbr->bw_probe_samples ? "Y" : "N"); + debugmsg[sizeof(debugmsg) - 1] = 0; + + /* printk takes a higher precedence. */ + if (bbr_debug_with_printk) + printk(KERN_DEBUG "%s", debugmsg); + + if (unlikely(bbr->debug.undo)) + bbr->debug.undo = 0; +} + +/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ +static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain, + bbr_pacing_margin_percent); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + return rate; +} + +/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ +static void bbr_init_pacing_rate_from_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + u32 rtt_us; + + if (tp->srtt_us) { /* any RTT sample yet? */ + rtt_us = max(tp->srtt_us >> 3, 1U); + bbr->has_seen_rtt = 1; + } else { /* no RTT sample yet */ + rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ + } + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, rtt_us); + sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr->params.high_gain); +} + +/* Pace using current bw estimate and a gain factor. */ +static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain); + + if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) + bbr_init_pacing_rate_from_rtt(sk); + if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; +} + +static u32 bbr_min_tso_segs(struct sock *sk) +{ + return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; +} + +/* Return the number of segments BBR would like in a TSO/GSO skb, given + * a particular max gso size as a constraint. + */ +static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, + u32 gso_max_size) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 segs, r; + u64 bytes; + + /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ + bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; + + /* Budget a TSO/GSO burst size allowance based on min_rtt. For every + * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst. + * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K) + */ + if (bbr->params.tso_rtt_shift) { + r = bbr->min_rtt_us >> bbr->params.tso_rtt_shift; + if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */ + bytes += GSO_MAX_SIZE >> r; + } + + bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); + return segs; +} + +/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ +static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) +{ + return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); +} + +/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ +static u32 bbr_tso_segs_goal(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); +} + +/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ +static void bbr_save_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) + bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ + else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ + bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); +} + +static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (event == CA_EVENT_TX_START && tp->app_limited) { + bbr->idle_restart = 1; + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + /* Avoid pointless buffer overflows: pace at est. bw if we don't + * need more speed (we're restarting from idle and app-limited). + */ + if (bbr->mode == BBR_PROBE_BW) + bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + else if (bbr->mode == BBR_PROBE_RTT) + bbr_check_probe_rtt_done(sk); + } else if ((event == CA_EVENT_ECN_IS_CE || + event == CA_EVENT_ECN_NO_CE) && + bbr_ecn_enable && + bbr->params.precise_ece_ack) { + u32 state = bbr->ce_state; + dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state); + bbr->ce_state = state; + if (tp->fast_ack_mode == 2 && event == CA_EVENT_ECN_IS_CE) + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); + } +} + +/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: + * + * bdp = ceil(bw * min_rtt * gain) + * + * The key factor, gain, controls the amount of queue. While a small gain + * builds a smaller queue, it becomes more vulnerable to noise in RTT + * measurements (e.g., delayed ACKs or other ACK compression effects). This + * noise may cause BBR to under-estimate the rate. + */ +static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bdp; + u64 w; + + /* If we've never had a valid RTT sample, cap cwnd at the initial + * default. This should only happen when the connection is not using TCP + * timestamps and has retransmitted all of the SYN/SYNACK/data packets + * ACKed so far. In this case, an RTO can cut cwnd to 1, in which + * case we need to slow-start up toward something safe: initial cwnd. + */ + if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ + return bbr->init_cwnd; /* be safe: cap at initial cwnd */ + + w = (u64)bw * bbr->min_rtt_us; + + /* Apply a gain to the given value, remove the BW_SCALE shift, and + * round the value up to avoid a negative feedback loop. + */ + bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + return bdp; +} + +/* To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 tso_segs_goal; + + tso_segs_goal = 3 * bbr_tso_segs_goal(sk); + + /* Allow enough full-sized skbs in flight to utilize end systems. */ + if (bbr->params.cwnd_tso_budget == 1) { + cwnd = max_t(u32, cwnd, tso_segs_goal); + cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target); + } else { + cwnd += tso_segs_goal; + cwnd = (cwnd + 1) & ~1U; + } + /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) + cwnd += 2; + + return cwnd; +} + +/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */ +static u32 bbr_inflight(struct sock *sk, u32 bw, int gain) +{ + u32 inflight; + + inflight = bbr_bdp(sk, bw, gain); + inflight = bbr_quantization_budget(sk, inflight); + + return inflight; +} + +/* With pacing at lower layers, there's often less data "in the network" than + * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq), + * we often have several skbs queued in the pacing layer with a pre-scheduled + * earliest departure time (EDT). BBR adapts its pacing rate based on the + * inflight level that it estimates has already been "baked in" by previous + * departure time decisions. We calculate a rough estimate of the number of our + * packets that might be in the network at the earliest departure time for the + * next skb scheduled: + * in_network_at_edt = inflight_at_edt - (EDT - now) * bw + * If we're increasing inflight, then we want to know if the transmit of the + * EDT skb will push inflight above the target, so inflight_at_edt includes + * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight, + * then estimate if inflight will sink too low just before the EDT transmit. + */ +static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 now_ns, edt_ns, interval_us; + u32 interval_delivered, inflight_at_edt; + + now_ns = tp->tcp_clock_cache; + edt_ns = max(tp->tcp_wstamp_ns, now_ns); + interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC); + interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE; + inflight_at_edt = inflight_now; + if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */ + inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */ + if (interval_delivered >= inflight_at_edt) + return 0; + return inflight_at_edt - interval_delivered; +} + +/* Find the cwnd increment based on estimate of ack aggregation */ +static u32 bbr_ack_aggregation_cwnd(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 max_aggr_cwnd, aggr_cwnd = 0; + + if (bbr->params.extra_acked_gain && + (bbr_full_bw_reached(sk) || bbr->params.extra_acked_in_startup)) { + max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) + / BW_UNIT; + aggr_cwnd = (bbr->params.extra_acked_gain * bbr_extra_acked(sk)) + >> BBR_SCALE; + aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); + } + + return aggr_cwnd; +} + +/* Returns the cwnd for PROBE_RTT mode. */ +static u32 bbr_probe_rtt_cwnd(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->params.probe_rtt_cwnd_gain == 0) + return bbr->params.cwnd_min_target; + return max_t(u32, bbr->params.cwnd_min_target, + bbr_bdp(sk, bbr_bw(sk), bbr->params.probe_rtt_cwnd_gain)); +} + +/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss + * has drawn us down below target), or snap down to target if we're above it. + */ +static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, + u32 acked, u32 bw, int gain, u32 cwnd, + struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 target_cwnd = 0, prev_cwnd = tp->snd_cwnd, max_probe; + + if (!acked) + goto done; /* no packet fully ACKed; just apply caps */ + + target_cwnd = bbr_bdp(sk, bw, gain); + + /* Increment the cwnd to account for excess ACKed data that seems + * due to aggregation (of data and/or ACKs) visible in the ACK stream. + */ + target_cwnd += bbr_ack_aggregation_cwnd(sk); + target_cwnd = bbr_quantization_budget(sk, target_cwnd); + + /* If we're below target cwnd, slow start cwnd toward target cwnd. */ + bbr->debug.target_cwnd = target_cwnd; + + /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */ + bbr->try_fast_path = 0; + if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */ + cwnd += acked; + if (cwnd >= target_cwnd) { + cwnd = target_cwnd; + bbr->try_fast_path = 1; + } + } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) { + cwnd += acked; + } else { + bbr->try_fast_path = 1; + } + + /* When growing cwnd, don't grow beyond twice what we just probed. */ + if (bbr->params.usage_based_cwnd) { + max_probe = max(2 * tp->max_packets_out, tp->snd_cwnd); + cwnd = min(cwnd, max_probe); + } + + cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target); +done: + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ + if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ + tp->snd_cwnd = min_t(u32, tp->snd_cwnd, bbr_probe_rtt_cwnd(sk)); + + ctx->target_cwnd = target_cwnd; + ctx->log = (tp->snd_cwnd != prev_cwnd); +} + +/* See if we have reached next round trip */ +static void bbr_update_round_start(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->round_start = 0; + + /* See if we've reached the next RTT */ + if (rs->interval_us > 0 && + !before(rs->prior_delivered, bbr->next_rtt_delivered)) { + bbr->next_rtt_delivered = tp->delivered; + bbr->round_start = 1; + } +} + +/* Calculate the bandwidth based on how fast packets are delivered */ +static void bbr_calculate_bw_sample(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = 0; + + /* Divide delivered by the interval to find a (lower bound) bottleneck + * bandwidth sample. Delivered is in packets and interval_us in uS and + * ratio will be <<1 for most connections. So delivered is first scaled. + * Round up to allow growth at low rates, even with integer division. + */ + if (rs->interval_us > 0) { + if (WARN_ONCE(rs->delivered < 0, + "negative delivered: %d interval_us: %ld\n", + rs->delivered, rs->interval_us)) + return; + + bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us); + } + + ctx->sample_bw = bw; + bbr->debug.rs_bw = bw; +} + +/* Estimates the windowed max degree of ack aggregation. + * This is used to provision extra in-flight data to keep sending during + * inter-ACK silences. + * + * Degree of ack aggregation is estimated as extra data acked beyond expected. + * + * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval" + * cwnd += max_extra_acked + * + * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). + * Max filter is an approximate sliding window of 5-10 (packet timed) round + * trips for non-startup phase, and 1-2 round trips for startup. + */ +static void bbr_update_ack_aggregation(struct sock *sk, + const struct rate_sample *rs) +{ + u32 epoch_us, expected_acked, extra_acked; + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 extra_acked_win_rtts_thresh = bbr->params.extra_acked_win_rtts; + + if (!bbr->params.extra_acked_gain || rs->acked_sacked <= 0 || + rs->delivered < 0 || rs->interval_us <= 0) + return; + + if (bbr->round_start) { + bbr->extra_acked_win_rtts = min(0x1F, + bbr->extra_acked_win_rtts + 1); + if (bbr->params.extra_acked_in_startup && + !bbr_full_bw_reached(sk)) + extra_acked_win_rtts_thresh = 1; + if (bbr->extra_acked_win_rtts >= + extra_acked_win_rtts_thresh) { + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? + 0 : 1; + bbr->extra_acked[bbr->extra_acked_win_idx] = 0; + } + } + + /* Compute how many packets we expected to be delivered over epoch. */ + epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp, + bbr->ack_epoch_mstamp); + expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT; + + /* Reset the aggregation epoch if ACK rate is below expected rate or + * significantly large no. of ack received since epoch (potentially + * quite old epoch). + */ + if (bbr->ack_epoch_acked <= expected_acked || + (bbr->ack_epoch_acked + rs->acked_sacked >= + bbr_ack_epoch_acked_reset_thresh)) { + bbr->ack_epoch_acked = 0; + bbr->ack_epoch_mstamp = tp->delivered_mstamp; + expected_acked = 0; + } + + /* Compute excess data delivered, beyond what was expected. */ + bbr->ack_epoch_acked = min_t(u32, 0xFFFFF, + bbr->ack_epoch_acked + rs->acked_sacked); + extra_acked = bbr->ack_epoch_acked - expected_acked; + extra_acked = min(extra_acked, tp->snd_cwnd); + if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx]) + bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; +} + +/* Estimate when the pipe is full, using the change in delivery rate: BBR + * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by + * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited + * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the + * higher rwin, 3: we get higher delivery rate samples. Or transient + * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar + * design goal, but uses delay and inter-ACK spacing instead of bandwidth. + */ +static void bbr_check_full_bw_reached(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw_thresh; + + if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) + return; + + bw_thresh = (u64)bbr->full_bw * bbr->params.full_bw_thresh >> BBR_SCALE; + if (bbr_max_bw(sk) >= bw_thresh) { + bbr->full_bw = bbr_max_bw(sk); + bbr->full_bw_cnt = 0; + return; + } + ++bbr->full_bw_cnt; + bbr->full_bw_reached = bbr->full_bw_cnt >= bbr->params.full_bw_cnt; +} + +/* If pipe is probably full, drain the queue and then enter steady-state. */ +static bool bbr_check_drain(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { + bbr->mode = BBR_DRAIN; /* drain queue we created */ + tcp_sk(sk)->snd_ssthresh = + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); + bbr2_reset_congestion_signals(sk); + } /* fall through to check if in-flight is already small: */ + if (bbr->mode == BBR_DRAIN && + bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) + return true; /* exiting DRAIN now */ + return false; +} + +static void bbr_check_probe_rtt_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (!(bbr->probe_rtt_done_stamp && + after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) + return; + + bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */ + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr2_exit_probe_rtt(sk); +} + +/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and + * periodically drain the bottleneck queue, to converge to measure the true + * min_rtt (unloaded propagation delay). This allows the flows to keep queues + * small (reducing queuing delay and packet loss) and achieve fairness among + * BBR flows. + * + * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires, + * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets. + * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed + * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and + * re-enter the previous mode. BBR uses 200ms to approximately bound the + * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s). + * + * Note that flows need only pay 2% if they are busy sending over the last 10 + * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have + * natural silences or low-rate periods within 10 seconds where the rate is low + * enough for long enough to drain its queue in the bottleneck. We pick up + * these min RTT measurements opportunistically with our min_rtt filter. :-) + */ +static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool probe_rtt_expired, min_rtt_expired; + u32 expire; + + /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */ + expire = bbr->probe_rtt_min_stamp + + msecs_to_jiffies(bbr->params.probe_rtt_win_ms); + probe_rtt_expired = after(tcp_jiffies32, expire); + if (rs->rtt_us >= 0 && + (rs->rtt_us <= bbr->probe_rtt_min_us || + (probe_rtt_expired && !rs->is_ack_delayed))) { + bbr->probe_rtt_min_us = rs->rtt_us; + bbr->probe_rtt_min_stamp = tcp_jiffies32; + } + /* Track min RTT seen in the min_rtt_win_sec filter window: */ + expire = bbr->min_rtt_stamp + bbr->params.min_rtt_win_sec * HZ; + min_rtt_expired = after(tcp_jiffies32, expire); + if (bbr->probe_rtt_min_us <= bbr->min_rtt_us || + min_rtt_expired) { + bbr->min_rtt_us = bbr->probe_rtt_min_us; + bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; + } + + if (bbr->params.probe_rtt_mode_ms > 0 && probe_rtt_expired && + !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { + bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ + bbr_save_cwnd(sk); /* note cwnd so we can restore it */ + bbr->probe_rtt_done_stamp = 0; + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; + } + + if (bbr->mode == BBR_PROBE_RTT) { + /* Ignore low rate samples during this mode. */ + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; + /* Maintain min packets in flight for max(200 ms, 1 round). */ + if (!bbr->probe_rtt_done_stamp && + tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) { + bbr->probe_rtt_done_stamp = tcp_jiffies32 + + msecs_to_jiffies(bbr->params.probe_rtt_mode_ms); + bbr->probe_rtt_round_done = 0; + bbr->next_rtt_delivered = tp->delivered; + } else if (bbr->probe_rtt_done_stamp) { + if (bbr->round_start) + bbr->probe_rtt_round_done = 1; + if (bbr->probe_rtt_round_done) + bbr_check_probe_rtt_done(sk); + } + } + /* Restart after idle ends only once we process a new S/ACK for data */ + if (rs->delivered > 0) + bbr->idle_restart = 0; +} + +static void bbr_update_gains(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + switch (bbr->mode) { + case BBR_STARTUP: + bbr->pacing_gain = bbr->params.high_gain; + bbr->cwnd_gain = bbr->params.startup_cwnd_gain; + break; + case BBR_DRAIN: + bbr->pacing_gain = bbr->params.drain_gain; /* slow, to drain */ + bbr->cwnd_gain = bbr->params.startup_cwnd_gain; /* keep cwnd */ + break; + case BBR_PROBE_BW: + bbr->pacing_gain = bbr->params.pacing_gain[bbr->cycle_idx]; + bbr->cwnd_gain = bbr->params.cwnd_gain; + break; + case BBR_PROBE_RTT: + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = BBR_UNIT; + break; + default: + WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode); + break; + } +} + +static void bbr_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + int i; + + WARN_ON_ONCE(tp->snd_cwnd >= bbr_cwnd_warn_val); + + bbr->initialized = 1; + bbr->params.high_gain = min(0x7FF, bbr_high_gain); + bbr->params.drain_gain = min(0x3FF, bbr_drain_gain); + bbr->params.startup_cwnd_gain = min(0x7FF, bbr_startup_cwnd_gain); + bbr->params.cwnd_gain = min(0x7FF, bbr_cwnd_gain); + bbr->params.cwnd_tso_budget = min(0x1U, bbr_cwnd_tso_budget); + bbr->params.cwnd_min_target = min(0xFU, bbr_cwnd_min_target); + bbr->params.min_rtt_win_sec = min(0x1FU, bbr_min_rtt_win_sec); + bbr->params.probe_rtt_mode_ms = min(0x1FFU, bbr_probe_rtt_mode_ms); + bbr->params.full_bw_cnt = min(0x7U, bbr_full_bw_cnt); + bbr->params.full_bw_thresh = min(0x3FFU, bbr_full_bw_thresh); + bbr->params.extra_acked_gain = min(0x7FF, bbr_extra_acked_gain); + bbr->params.extra_acked_win_rtts = min(0x1FU, bbr_extra_acked_win_rtts); + bbr->params.drain_to_target = bbr_drain_to_target ? 1 : 0; + bbr->params.precise_ece_ack = bbr_precise_ece_ack ? 1 : 0; + bbr->params.extra_acked_in_startup = bbr_extra_acked_in_startup ? 1 : 0; + bbr->params.probe_rtt_cwnd_gain = min(0xFFU, bbr_probe_rtt_cwnd_gain); + bbr->params.probe_rtt_win_ms = + min(0x3FFFU, + min_t(u32, bbr_probe_rtt_win_ms, + bbr->params.min_rtt_win_sec * MSEC_PER_SEC)); + for (i = 0; i < CYCLE_LEN; i++) + bbr->params.pacing_gain[i] = min(0x3FF, bbr_pacing_gain[i]); + bbr->params.usage_based_cwnd = bbr_usage_based_cwnd ? 1 : 0; + bbr->params.tso_rtt_shift = min(0xFU, bbr_tso_rtt_shift); + + bbr->debug.snd_isn = tp->snd_una; + bbr->debug.target_cwnd = 0; + bbr->debug.undo = 0; + + bbr->init_cwnd = min(0x7FU, tp->snd_cwnd); + bbr->prior_cwnd = tp->prior_cwnd; + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + bbr->next_rtt_delivered = 0; + bbr->prev_ca_state = TCP_CA_Open; + bbr->packet_conservation = 0; + + bbr->probe_rtt_done_stamp = 0; + bbr->probe_rtt_round_done = 0; + bbr->probe_rtt_min_us = tcp_min_rtt(tp); + bbr->probe_rtt_min_stamp = tcp_jiffies32; + bbr->min_rtt_us = tcp_min_rtt(tp); + bbr->min_rtt_stamp = tcp_jiffies32; + + bbr->has_seen_rtt = 0; + bbr_init_pacing_rate_from_rtt(sk); + + bbr->round_start = 0; + bbr->idle_restart = 0; + bbr->full_bw_reached = 0; + bbr->full_bw = 0; + bbr->full_bw_cnt = 0; + bbr->cycle_mstamp = 0; + bbr->cycle_idx = 0; + bbr->mode = BBR_STARTUP; + bbr->debug.rs_bw = 0; + + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = 0; + bbr->extra_acked[0] = 0; + bbr->extra_acked[1] = 0; + + bbr->ce_state = 0; + bbr->prior_rcv_nxt = tp->rcv_nxt; + bbr->try_fast_path = 0; + + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); +} + +static u32 bbr_sndbuf_expand(struct sock *sk) +{ + /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ + return 3; +} + +/* __________________________________________________________________________ + * + * Functions new to BBR v2 ("bbr") congestion control are below here. + * __________________________________________________________________________ + */ + +/* Incorporate a new bw sample into the current window of our max filter. */ +static void bbr2_take_bw_hi_sample(struct sock *sk, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]); +} + +/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */ +static void bbr2_advance_bw_hi_filter(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (!bbr->bw_hi[1]) + return; /* no samples in this window; remember old window */ + bbr->bw_hi[0] = bbr->bw_hi[1]; + bbr->bw_hi[1] = 0; +} + +/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */ +static u32 bbr2_target_inflight(struct sock *sk) +{ + u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT); + + return min(bdp, tcp_sk(sk)->snd_cwnd); +} + +static bool bbr2_is_probing_bandwidth(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return (bbr->mode == BBR_STARTUP) || + (bbr->mode == BBR_PROBE_BW && + (bbr->cycle_idx == BBR_BW_PROBE_REFILL || + bbr->cycle_idx == BBR_BW_PROBE_UP)); +} + +/* Has the given amount of time elapsed since we marked the phase start? */ +static bool bbr2_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) +{ + const struct tcp_sock *tp = tcp_sk(sk); + const struct bbr *bbr = inet_csk_ca(sk); + + return tcp_stamp_us_delta(tp->tcp_mstamp, + bbr->cycle_mstamp + interval_us) > 0; +} + +static void bbr2_handle_queue_too_high_in_startup(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->full_bw_reached = 1; + bbr->inflight_hi = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); +} + +/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ +static void bbr2_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || + !bbr->params.full_ecn_cnt || !bbr->params.ecn_thresh) + return; + + if (ce_ratio >= bbr->params.ecn_thresh) + bbr->startup_ecn_rounds++; + else + bbr->startup_ecn_rounds = 0; + + if (bbr->startup_ecn_rounds >= bbr->params.full_ecn_cnt) { + bbr->debug.event = 'E'; /* ECN caused STARTUP exit */ + bbr2_handle_queue_too_high_in_startup(sk); + return; + } +} + +static void bbr2_update_ecn_alpha(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + s32 delivered, delivered_ce; + u64 alpha, ce_ratio; + u32 gain; + + if (bbr->params.ecn_factor == 0) + return; + + delivered = tp->delivered - bbr->alpha_last_delivered; + delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce; + + if (delivered == 0 || /* avoid divide by zero */ + WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */ + return; + + /* See if we should use ECN sender logic for this connection. */ + if (!bbr->ecn_eligible && bbr_ecn_enable && + (bbr->min_rtt_us <= bbr->params.ecn_max_rtt_us || + !bbr->params.ecn_max_rtt_us)) + bbr->ecn_eligible = 1; + + ce_ratio = (u64)delivered_ce << BBR_SCALE; + do_div(ce_ratio, delivered); + gain = bbr->params.ecn_alpha_gain; + alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE; + alpha += (gain * ce_ratio) >> BBR_SCALE; + bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT); + + bbr->alpha_last_delivered = tp->delivered; + bbr->alpha_last_delivered_ce = tp->delivered_ce; + + bbr2_check_ecn_too_high_in_startup(sk, ce_ratio); +} + +/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */ +static void bbr2_raise_inflight_hi_slope(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 growth_this_round, cnt; + + /* Calculate "slope": packets S/Acked per inflight_hi increment. */ + growth_this_round = 1 << bbr->bw_probe_up_rounds; + bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30); + cnt = tp->snd_cwnd / growth_this_round; + cnt = max(cnt, 1U); + bbr->bw_probe_up_cnt = cnt; + bbr->debug.event = 'G'; /* Grow inflight_hi slope */ +} + +/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */ +static void bbr2_probe_inflight_hi_upward(struct sock *sk, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 delta; + + if (!tp->is_cwnd_limited || tp->snd_cwnd < bbr->inflight_hi) { + bbr->bw_probe_up_acks = 0; /* don't accmulate unused credits */ + return; /* not fully using inflight_hi, so don't grow it */ + } + + /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */ + bbr->bw_probe_up_acks += rs->acked_sacked; + if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) { + delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt; + bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt; + bbr->inflight_hi += delta; + bbr->debug.event = 'I'; /* Increment inflight_hi */ + } + + if (bbr->round_start) + bbr2_raise_inflight_hi_slope(sk); +} + +/* Does loss/ECN rate for this sample say inflight is "too high"? + * This is used by both the bbr_check_loss_too_high_in_startup() function, + * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which + * uses it to notice when loss/ECN rates suggest inflight is too high. + */ +static bool bbr2_is_inflight_too_high(const struct sock *sk, + const struct rate_sample *rs) +{ + const struct bbr *bbr = inet_csk_ca(sk); + u32 loss_thresh, ecn_thresh; + + if (rs->lost > 0 && rs->tx_in_flight) { + loss_thresh = (u64)rs->tx_in_flight * bbr->params.loss_thresh >> + BBR_SCALE; + if (rs->lost > loss_thresh) + return true; + } + + if (rs->delivered_ce > 0 && rs->delivered > 0 && + bbr->ecn_eligible && bbr->params.ecn_thresh) { + ecn_thresh = (u64)rs->delivered * bbr->params.ecn_thresh >> + BBR_SCALE; + if (rs->delivered_ce >= ecn_thresh) + return true; + } + + return false; +} + +/* Calculate the tx_in_flight level that corresponded to excessive loss. + * We find "lost_prefix" segs of the skb where loss rate went too high, + * by solving for "lost_prefix" in the following equation: + * lost / inflight >= loss_thresh + * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh + * Then we take that equation, convert it to fixed point, and + * round up to the nearest packet. + */ +static u32 bbr2_inflight_hi_from_lost_skb(const struct sock *sk, + const struct rate_sample *rs, + const struct sk_buff *skb) +{ + const struct bbr *bbr = inet_csk_ca(sk); + u32 loss_thresh = bbr->params.loss_thresh; + u32 pcount, divisor, inflight_hi; + s32 inflight_prev, lost_prev; + u64 loss_budget, lost_prefix; + + pcount = tcp_skb_pcount(skb); + + /* How much data was in flight before this skb? */ + inflight_prev = rs->tx_in_flight - pcount; + if (WARN_ONCE(inflight_prev < 0, + "tx_in_flight: %u pcount: %u reneg: %u", + rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg)) + return ~0U; + + /* How much inflight data was marked lost before this skb? */ + lost_prev = rs->lost - pcount; + if (WARN_ON_ONCE(lost_prev < 0)) + return ~0U; + + /* At what prefix of this lost skb did losss rate exceed loss_thresh? */ + loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1; + loss_budget >>= BBR_SCALE; + if (lost_prev >= loss_budget) { + lost_prefix = 0; /* previous losses crossed loss_thresh */ + } else { + lost_prefix = loss_budget - lost_prev; + lost_prefix <<= BBR_SCALE; + divisor = BBR_UNIT - loss_thresh; + if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */ + return ~0U; + do_div(lost_prefix, divisor); + } + + inflight_hi = inflight_prev + lost_prefix; + return inflight_hi; +} + +/* If loss/ECN rates during probing indicated we may have overfilled a + * buffer, return an operating point that tries to leave unutilized headroom in + * the path for other flows, for fairness convergence and lower RTTs and loss. + */ +static u32 bbr2_inflight_with_headroom(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 headroom, headroom_fraction; + + if (bbr->inflight_hi == ~0U) + return ~0U; + + headroom_fraction = bbr->params.inflight_headroom; + headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE; + headroom = max(headroom, 1U); + return max_t(s32, bbr->inflight_hi - headroom, + bbr->params.cwnd_min_target); +} + +/* Bound cwnd to a sensible level, based on our current probing state + * machine phase and model of a good inflight level (inflight_lo, inflight_hi). + */ +static void bbr2_bound_cwnd_for_inflight_model(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 cap; + + /* tcp_rcv_synsent_state_process() currently calls tcp_ack() + * and thus cong_control() without first initializing us(!). + */ + if (!bbr->initialized) + return; + + cap = ~0U; + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx != BBR_BW_PROBE_CRUISE) { + /* Probe to see if more packets fit in the path. */ + cap = bbr->inflight_hi; + } else { + if (bbr->mode == BBR_PROBE_RTT || + (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_CRUISE)) + cap = bbr2_inflight_with_headroom(sk); + } + /* Adapt to any loss/ECN since our last bw probe. */ + cap = min(cap, bbr->inflight_lo); + + cap = max_t(u32, cap, bbr->params.cwnd_min_target); + tp->snd_cwnd = min(cap, tp->snd_cwnd); +} + +/* Estimate a short-term lower bound on the capacity available now, based + * on measurements of the current delivery process and recent history. When we + * are seeing loss/ECN at times when we are not probing bw, then conservatively + * move toward flow balance by multiplicatively cutting our short-term + * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a + * multiplicative decrease in order to converge to a lower capacity in time + * logarithmic in the magnitude of the decrease. + * + * However, we do not cut our short-term estimates lower than the current rate + * and volume of delivered data from this round trip, since from the current + * delivery process we can estimate the measured capacity available now. + * + * Anything faster than that approach would knowingly risk high loss, which can + * cause low bw for Reno/CUBIC and high loss recovery latency for + * request/response flows using any congestion control. + */ +static void bbr2_adapt_lower_bounds(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 ecn_cut, ecn_inflight_lo, beta; + + /* We only use lower-bound estimates when not probing bw. + * When probing we need to push inflight higher to probe bw. + */ + if (bbr2_is_probing_bandwidth(sk)) + return; + + /* ECN response. */ + if (bbr->ecn_in_round && bbr->ecn_eligible && bbr->params.ecn_factor) { + /* Reduce inflight to (1 - alpha*ecn_factor). */ + ecn_cut = (BBR_UNIT - + ((bbr->ecn_alpha * bbr->params.ecn_factor) >> + BBR_SCALE)); + if (bbr->inflight_lo == ~0U) + bbr->inflight_lo = tp->snd_cwnd; + ecn_inflight_lo = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE; + } else { + ecn_inflight_lo = ~0U; + } + + /* Loss response. */ + if (bbr->loss_in_round) { + /* Reduce bw and inflight to (1 - beta). */ + if (bbr->bw_lo == ~0U) + bbr->bw_lo = bbr_max_bw(sk); + if (bbr->inflight_lo == ~0U) + bbr->inflight_lo = tp->snd_cwnd; + beta = bbr->params.beta; + bbr->bw_lo = + max_t(u32, bbr->bw_latest, + (u64)bbr->bw_lo * + (BBR_UNIT - beta) >> BBR_SCALE); + bbr->inflight_lo = + max_t(u32, bbr->inflight_latest, + (u64)bbr->inflight_lo * + (BBR_UNIT - beta) >> BBR_SCALE); + } + + /* Adjust to the lower of the levels implied by loss or ECN. */ + bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo); +} + +/* Reset any short-term lower-bound adaptation to congestion, so that we can + * push our inflight up. + */ +static void bbr2_reset_lower_bounds(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->bw_lo = ~0U; + bbr->inflight_lo = ~0U; +} + +/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state + * machine phase where we adapt our lower bound based on congestion signals. + */ +static void bbr2_reset_congestion_signals(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; + bbr->loss_in_cycle = 0; + bbr->ecn_in_cycle = 0; + bbr->bw_latest = 0; + bbr->inflight_latest = 0; +} + +/* Update (most of) our congestion signals: track the recent rate and volume of + * delivered data, presence of loss, and EWMA degree of ECN marking. + */ +static void bbr2_update_congestion_signals( + struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->loss_round_start = 0; + if (rs->interval_us <= 0 || !rs->acked_sacked) + return; /* Not a valid observation */ + bw = ctx->sample_bw; + + if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) + bbr2_take_bw_hi_sample(sk, bw); + + bbr->loss_in_round |= (rs->losses > 0); + + /* Update rate and volume of delivered data from latest round trip: */ + bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw); + bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered); + + if (before(rs->prior_delivered, bbr->loss_round_delivered)) + return; /* skip the per-round-trip updates */ + /* Now do per-round-trip updates. */ + bbr->loss_round_delivered = tp->delivered; /* mark round trip */ + bbr->loss_round_start = 1; + bbr2_adapt_lower_bounds(sk); + + /* Update windowed "latest" (single-round-trip) filters. */ + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; + bbr->bw_latest = ctx->sample_bw; + bbr->inflight_latest = rs->delivered; +} + +/* Bandwidth probing can cause loss. To help coexistence with loss-based + * congestion control we spread out our probing in a Reno-conscious way. Due to + * the shape of the Reno sawtooth, the time required between loss epochs for an + * idealized Reno flow is a number of round trips that is the BDP of that + * flow. We count packet-timed round trips directly, since measured RTT can + * vary widely, and Reno is driven by packet-timed round trips. + */ +static bool bbr2_is_reno_coexistence_probe_time(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 inflight, rounds, reno_gain, reno_rounds; + + /* Random loss can shave some small percentage off of our inflight + * in each round. To survive this, flows need robust periodic probes. + */ + rounds = bbr->params.bw_probe_max_rounds; + + reno_gain = bbr->params.bw_probe_reno_gain; + if (reno_gain) { + inflight = bbr2_target_inflight(sk); + reno_rounds = ((u64)inflight * reno_gain) >> BBR_SCALE; + rounds = min(rounds, reno_rounds); + } + return bbr->rounds_since_probe >= rounds; +} + +/* How long do we want to wait before probing for bandwidth (and risking + * loss)? We randomize the wait, for better mixing and fairness convergence. + * + * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips. + * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow, + * (eg 4K video to a broadband user): + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + * + * We bound the BBR-native inter-bw-probe wall clock time to be: + * (a) higher than 2 sec: to try to avoid causing loss for a long enough time + * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must + * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs + * (b) lower than 3 sec: to ensure flows can start probing in a reasonable + * amount of time to discover unutilized bw on human-scale interactive + * time-scales (e.g. perhaps traffic from a web page download that we + * were competing with is now complete). + */ +static void bbr2_pick_probe_wait(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Decide the random round-trip bound for wait until probe: */ + bbr->rounds_since_probe = + prandom_u32_max(bbr->params.bw_probe_rand_rounds); + /* Decide the random wall clock bound for wait until probe: */ + bbr->probe_wait_us = bbr->params.bw_probe_base_us + + prandom_u32_max(bbr->params.bw_probe_rand_us); +} + +static void bbr2_set_cycle_idx(struct sock *sk, int cycle_idx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->cycle_idx = cycle_idx; + /* New phase, so need to update cwnd and pacing rate. */ + bbr->try_fast_path = 0; +} + +/* Send at estimated bw to fill the pipe, but not queue. We need this phase + * before PROBE_UP, because as soon as we send faster than the available bw + * we will start building a queue, and if the buffer is shallow we can cause + * loss. If we do not fill the pipe before we cause this loss, our bw_hi and + * inflight_hi estimates will underestimate. + */ +static void bbr2_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr2_reset_lower_bounds(sk); + if (bbr->inflight_hi != ~0U) + bbr->inflight_hi += bbr->params.refill_add_inc; + bbr->bw_probe_up_rounds = bw_probe_up_rounds; + bbr->bw_probe_up_acks = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_REFILLING; + bbr->next_rtt_delivered = tp->delivered; + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_REFILL); +} + +/* Now probe max deliverable data rate and volume. */ +static void bbr2_start_bw_probe_up(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->ack_phase = BBR_ACKS_PROBE_STARTING; + bbr->next_rtt_delivered = tp->delivered; + bbr->cycle_mstamp = tp->tcp_mstamp; + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_UP); + bbr2_raise_inflight_hi_slope(sk); +} + +/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall + * clock time at which to probe beyond an inflight that we think to be + * safe. This will knowingly risk packet loss, so we want to do this rarely, to + * keep packet loss rates low. Also start a round-trip counter, to probe faster + * if we estimate a Reno flow at our BDP would probe faster. + */ +static void bbr2_start_bw_probe_down(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr2_reset_congestion_signals(sk); + bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */ + bbr2_pick_probe_wait(sk); + bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */ + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_DOWN); +} + +/* Cruise: maintain what we estimate to be a neutral, conservative + * operating point, without attempting to probe up for bandwidth or down for + * RTT, and only reducing inflight in response to loss/ECN signals. + */ +static void bbr2_start_bw_probe_cruise(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->inflight_lo != ~0U) + bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi); + + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE); +} + +/* Loss and/or ECN rate is too high while probing. + * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle. + */ +static void bbr2_handle_inflight_too_high(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + const u32 beta = bbr->params.beta; + + bbr->prev_probe_too_high = 1; + bbr->bw_probe_samples = 0; /* only react once per probe */ + bbr->debug.event = 'L'; /* Loss/ECN too high */ + /* If we are app-limited then we are not robustly + * probing the max volume of inflight data we think + * might be safe (analogous to how app-limited bw + * samples are not known to be robustly probing bw). + */ + if (!rs->is_app_limited) + bbr->inflight_hi = max_t(u32, rs->tx_in_flight, + (u64)bbr2_target_inflight(sk) * + (BBR_UNIT - beta) >> BBR_SCALE); + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr2_start_bw_probe_down(sk); +} + +/* If we're seeing bw and loss samples reflecting our bw probing, adapt + * using the signals we see. If loss or ECN mark rate gets too high, then adapt + * inflight_hi downward. If we're able to push inflight higher without such + * signals, push higher: adapt inflight_hi upward. + */ +static bool bbr2_adapt_upper_bounds(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Track when we'll see bw/loss samples resulting from our bw probes. */ + if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start) + bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK; + if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) { + /* End of samples from bw probing phase. */ + bbr->bw_probe_samples = 0; + bbr->ack_phase = BBR_ACKS_INIT; + /* At this point in the cycle, our current bw sample is also + * our best recent chance at finding the highest available bw + * for this flow. So now is the best time to forget the bw + * samples from the previous cycle, by advancing the window. + */ + if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited) + bbr2_advance_bw_hi_filter(sk); + /* If we had an inflight_hi, then probed and pushed inflight all + * the way up to hit that inflight_hi without seeing any + * high loss/ECN in all the resulting ACKs from that probing, + * then probe up again, this time letting inflight persist at + * inflight_hi for a round trip, then accelerating beyond. + */ + if (bbr->mode == BBR_PROBE_BW && + bbr->stopped_risky_probe && !bbr->prev_probe_too_high) { + bbr->debug.event = 'R'; /* reprobe */ + bbr2_start_bw_probe_refill(sk, 0); + return true; /* yes, decided state transition */ + } + } + + if (bbr2_is_inflight_too_high(sk, rs)) { + if (bbr->bw_probe_samples) /* sample is from bw probing? */ + bbr2_handle_inflight_too_high(sk, rs); + } else { + /* Loss/ECN rate is declared safe. Adjust upper bound upward. */ + if (bbr->inflight_hi == ~0U) /* no excess queue signals yet? */ + return false; + + /* To be resilient to random loss, we must raise inflight_hi + * if we observe in any phase that a higher level is safe. + */ + if (rs->tx_in_flight > bbr->inflight_hi) { + bbr->inflight_hi = rs->tx_in_flight; + bbr->debug.event = 'U'; /* raise up inflight_hi */ + } + + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr2_probe_inflight_hi_upward(sk, rs); + } + + return false; +} + +/* Check if it's time to probe for bandwidth now, and if so, kick it off. */ +static bool bbr2_check_time_to_probe_bw(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 n; + + /* If we seem to be at an operating point where we are not seeing loss + * but we are seeing ECN marks, then when the ECN marks cease we reprobe + * quickly (in case a burst of cross-traffic has ceased and freed up bw, + * or in case we are sharing with multiplicatively probing traffic). + */ + if (bbr->params.ecn_reprobe_gain && bbr->ecn_eligible && + bbr->ecn_in_cycle && !bbr->loss_in_cycle && + inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { + bbr->debug.event = 'A'; /* *A*ll clear to probe *A*gain */ + /* Calculate n so that when bbr2_raise_inflight_hi_slope() + * computes growth_this_round as 2^n it will be roughly the + * desired volume of data (inflight_hi*ecn_reprobe_gain). + */ + n = ilog2((((u64)bbr->inflight_hi * + bbr->params.ecn_reprobe_gain) >> BBR_SCALE)); + bbr2_start_bw_probe_refill(sk, n); + return true; + } + + if (bbr2_has_elapsed_in_phase(sk, bbr->probe_wait_us) || + bbr2_is_reno_coexistence_probe_time(sk)) { + bbr2_start_bw_probe_refill(sk, 0); + return true; + } + return false; +} + +/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */ +static bool bbr2_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + bool is_under_bdp, is_long_enough; + + /* Always need to pull inflight down to leave headroom in queue. */ + if (inflight > bbr2_inflight_with_headroom(sk)) + return false; + + is_under_bdp = inflight <= bbr_inflight(sk, bw, BBR_UNIT); + if (bbr->params.drain_to_target) + return is_under_bdp; + + is_long_enough = bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us); + return is_under_bdp || is_long_enough; +} + +/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */ +static void bbr2_update_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + bool is_risky = false, is_queuing = false; + u32 inflight, bw; + + if (!bbr_full_bw_reached(sk)) + return; + + /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */ + if (bbr2_adapt_upper_bounds(sk, rs)) + return; /* already decided state transition */ + + if (bbr->mode != BBR_PROBE_BW) + return; + + inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); + bw = bbr_max_bw(sk); + + switch (bbr->cycle_idx) { + /* First we spend most of our time cruising with a pacing_gain of 1.0, + * which paces at the estimated bw, to try to fully use the pipe + * without building queue. If we encounter loss/ECN marks, we adapt + * by slowing down. + */ + case BBR_BW_PROBE_CRUISE: + if (bbr2_check_time_to_probe_bw(sk)) + return; /* already decided state transition */ + break; + + /* After cruising, when it's time to probe, we first "refill": we send + * at the estimated bw to fill the pipe, before probing higher and + * knowingly risking overflowing the bottleneck buffer (causing loss). + */ + case BBR_BW_PROBE_REFILL: + if (bbr->round_start) { + /* After one full round trip of sending in REFILL, we + * start to see bw samples reflecting our REFILL, which + * may be putting too much data in flight. + */ + bbr->bw_probe_samples = 1; + bbr2_start_bw_probe_up(sk); + } + break; + + /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to + * probe for bw. If we have not seen loss/ECN, we try to raise inflight + * to at least pacing_gain*BDP; note that this may take more than + * min_rtt if min_rtt is small (e.g. on a LAN). + * + * We terminate PROBE_UP bandwidth probing upon any of the following: + * + * (1) We've pushed inflight up to hit the inflight_hi target set in the + * most recent previous bw probe phase. Thus we want to start + * draining the queue immediately because it's very likely the most + * recently sent packets will fill the queue and cause drops. + * (checked here) + * (2) We have probed for at least 1*min_rtt_us, and the + * estimated queue is high enough (inflight > 1.25 * estimated_bdp). + * (checked here) + * (3) Loss filter says loss rate is "too high". + * (checked in bbr_is_inflight_too_high()) + * (4) ECN filter says ECN mark rate is "too high". + * (checked in bbr_is_inflight_too_high()) + */ + case BBR_BW_PROBE_UP: + if (bbr->prev_probe_too_high && + inflight >= bbr->inflight_hi) { + bbr->stopped_risky_probe = 1; + is_risky = true; + bbr->debug.event = 'D'; /* D for danger */ + } else if (bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us) && + inflight >= + bbr_inflight(sk, bw, + bbr->params.bw_probe_pif_gain)) { + is_queuing = true; + bbr->debug.event = 'Q'; /* building Queue */ + } + if (is_risky || is_queuing) { + bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */ + bbr2_start_bw_probe_down(sk); /* restart w/ down */ + } + break; + + /* After probing in PROBE_UP, we have usually accumulated some data in + * the bottleneck buffer (if bw probing didn't find more bw). We next + * enter PROBE_DOWN to try to drain any excess data from the queue. To + * do this, we use a pacing_gain < 1.0. We hold this pacing gain until + * our inflight is less then that target cruising point, which is the + * minimum of (a) the amount needed to leave headroom, and (b) the + * estimated BDP. Once inflight falls to match the target, we estimate + * the queue is drained; persisting would underutilize the pipe. + */ + case BBR_BW_PROBE_DOWN: + if (bbr2_check_time_to_probe_bw(sk)) + return; /* already decided state transition */ + if (bbr2_check_time_to_cruise(sk, inflight, bw)) + bbr2_start_bw_probe_cruise(sk); + break; + + default: + WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx); + } +} + +/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */ +static void bbr2_exit_probe_rtt(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr2_reset_lower_bounds(sk); + if (bbr_full_bw_reached(sk)) { + bbr->mode = BBR_PROBE_BW; + /* Raising inflight after PROBE_RTT may cause loss, so reset + * the PROBE_BW clock and schedule the next bandwidth probe for + * a friendly and randomized future point in time. + */ + bbr2_start_bw_probe_down(sk); + /* Since we are exiting PROBE_RTT, we know inflight is + * below our estimated BDP, so it is reasonable to cruise. + */ + bbr2_start_bw_probe_cruise(sk); + } else { + bbr->mode = BBR_STARTUP; + } +} + +/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until + * the end of the round in recovery to get a good estimate of how many packets + * have been lost, and how many we need to drain with a low pacing rate. + */ +static void bbr2_check_loss_too_high_in_startup(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk)) + return; + + /* For STARTUP exit, check the loss rate at the end of each round trip + * of Recovery episodes in STARTUP. We check the loss rate at the end + * of the round trip to filter out noisy/low loss and have a better + * sense of inflight (extent of loss), so we can drain more accurately. + */ + if (rs->losses && bbr->loss_events_in_round < 0xf) + bbr->loss_events_in_round++; /* update saturating counter */ + if (bbr->params.full_loss_cnt && bbr->loss_round_start && + inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery && + bbr->loss_events_in_round >= bbr->params.full_loss_cnt && + bbr2_is_inflight_too_high(sk, rs)) { + bbr->debug.event = 'P'; /* Packet loss caused STARTUP exit */ + bbr2_handle_queue_too_high_in_startup(sk); + return; + } + if (bbr->loss_round_start) + bbr->loss_events_in_round = 0; +} + +/* If we are done draining, advance into steady state operation in PROBE_BW. */ +static void bbr2_check_drain(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_check_drain(sk, rs, ctx)) { + bbr->mode = BBR_PROBE_BW; + bbr2_start_bw_probe_down(sk); + } +} + +static void bbr2_update_model(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + bbr2_update_congestion_signals(sk, rs, ctx); + bbr_update_ack_aggregation(sk, rs); + bbr2_check_loss_too_high_in_startup(sk, rs); + bbr_check_full_bw_reached(sk, rs); + bbr2_check_drain(sk, rs, ctx); + bbr2_update_cycle_phase(sk, rs); + bbr_update_min_rtt(sk, rs); +} + +/* Fast path for app-limited case. + * + * On each ack, we execute bbr state machine, which primarily consists of: + * 1) update model based on new rate sample, and + * 2) update control based on updated model or state change. + * + * There are certain workload/scenarios, e.g. app-limited case, where + * either we can skip updating model or we can skip update of both model + * as well as control. This provides signifcant softirq cpu savings for + * processing incoming acks. + * + * In case of app-limited, if there is no congestion (loss/ecn) and + * if observed bw sample is less than current estimated bw, then we can + * skip some of the computation in bbr state processing: + * + * - if there is no rtt/mode/phase change: In this case, since all the + * parameters of the network model are constant, we can skip model + * as well control update. + * + * - else we can skip rest of the model update. But we still need to + * update the control to account for the new rtt/mode/phase. + * + * Returns whether we can take fast path or not. + */ +static bool bbr2_fast_path(struct sock *sk, bool *update_model, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 prev_min_rtt_us, prev_mode; + + if (bbr->params.fast_path && bbr->try_fast_path && + rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) && + !bbr->loss_in_round && !bbr->ecn_in_round) { + prev_mode = bbr->mode; + prev_min_rtt_us = bbr->min_rtt_us; + bbr2_check_drain(sk, rs, ctx); + bbr2_update_cycle_phase(sk, rs); + bbr_update_min_rtt(sk, rs); + + if (bbr->mode == prev_mode && + bbr->min_rtt_us == prev_min_rtt_us && + bbr->try_fast_path) + return true; + + /* Skip model update, but control still needs to be updated */ + *update_model = false; + } + return false; +} + +static void bbr2_main(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct bbr_context ctx = { 0 }; + bool update_model = true; + u32 bw; + + bbr->debug.event = '.'; /* init to default NOP (no event yet) */ + + bbr_update_round_start(sk, rs, &ctx); + if (bbr->round_start) { + bbr->rounds_since_probe = + min_t(s32, bbr->rounds_since_probe + 1, 0xFF); + bbr2_update_ecn_alpha(sk); + } + + bbr->ecn_in_round |= rs->is_ece; + bbr_calculate_bw_sample(sk, rs, &ctx); + + if (bbr2_fast_path(sk, &update_model, rs, &ctx)) + goto out; + + if (update_model) + bbr2_update_model(sk, rs, &ctx); + + bbr_update_gains(sk); + bw = bbr_bw(sk); + bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); + bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain, + tp->snd_cwnd, &ctx); + bbr2_bound_cwnd_for_inflight_model(sk); + +out: + bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state; + bbr->loss_in_cycle |= rs->lost > 0; + bbr->ecn_in_cycle |= rs->delivered_ce > 0; + + bbr_debug(sk, rs->acked_sacked, rs, &ctx); +} + +/* Module parameters that are settable by TCP_CONGESTION_PARAMS are declared + * down here, so that the algorithm functions that use the parameters must use + * the per-socket parameters; if they accidentally use the global version + * then there will be a compile error. + * TODO(ncardwell): move all per-socket parameters down to this section. + */ + +/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE. + * No loss response when 0. Max allwed value is 255. + */ +static u32 bbr_beta = BBR_UNIT * 30 / 100; + +/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE. + * Max allowed value is 255. + */ +static u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; /* 1/16 = 6.25% */ + +/* The initial value for the ecn_alpha state variable. Default and max + * BBR_UNIT (256), representing 1.0. This allows a flow to respond quickly + * to congestion if the bottleneck is congested when the flow starts up. + */ +static u32 bbr_ecn_alpha_init = BBR_UNIT; /* 1.0, to respond quickly */ + +/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE. + * No ECN based bounding when 0. Max allwed value is 255. + */ +static u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */ + +/* Estimate bw probing has gone too far if CE ratio exceeds this threshold. + * Scaled by BBR_SCALE. Disabled when 0. Max allowed is 255. + */ +static u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */ + +/* Max RTT (in usec) at which to use sender-side ECN logic. + * Disabled when 0 (ECN allowed at any RTT). + * Max allowed for the parameter is 524287 (0x7ffff) us, ~524 ms. + */ +static u32 bbr_ecn_max_rtt_us = 5000; + +/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN + * clears then use a multiplicative increase to quickly reprobe bw by + * starting inflight probing at the given multiple of inflight_hi. + * Default for this experimental knob is 0 (disabled). + * Planned value for experiments: BBR_UNIT * 1 / 2 = 128, representing 0.5. + */ +static u32 bbr_ecn_reprobe_gain; + +/* Estimate bw probing has gone too far if loss rate exceeds this level. */ +static u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */ + +/* Exit STARTUP if number of loss marking events in a Recovery round is >= N, + * and loss rate is higher than bbr_loss_thresh. + * Disabled if 0. Max allowed value is 15 (0xF). + */ +static u32 bbr_full_loss_cnt = 8; + +/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh + * meets this count. Max allowed value is 3. + */ +static u32 bbr_full_ecn_cnt = 2; + +/* Fraction of unutilized headroom to try to leave in path upon high loss. */ +static u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100; + +/* Multiplier to get target inflight (as multiple of BDP) for PROBE_UP phase. + * Default is 1.25x, as in BBR v1. Max allowed is 511. + */ +static u32 bbr_bw_probe_pif_gain = BBR_UNIT * 5 / 4; + +/* Multiplier to get Reno-style probe epoch duration as: k * BDP round trips. + * If zero, disables this BBR v2 Reno-style BDP-scaled coexistence mechanism. + * Max allowed is 511. + */ +static u32 bbr_bw_probe_reno_gain = BBR_UNIT; + +/* Max number of packet-timed rounds to wait before probing for bandwidth. If + * we want to tolerate 1% random loss per round, and not have this cut our + * inflight too much, we must probe for bw periodically on roughly this scale. + * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance. + * We aim to be fair with Reno/CUBIC up to a BDP of at least: + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + */ +static u32 bbr_bw_probe_max_rounds = 63; + +/* Max amount of randomness to inject in round counting for Reno-coexistence. + * Max value is 15. + */ +static u32 bbr_bw_probe_rand_rounds = 2; + +/* Use BBR-native probe time scale starting at this many usec. + * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least: + * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs + */ +static u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */ + +/* Use BBR-native probes spread over this many usec: */ +static u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */ + +/* Undo the model changes made in loss recovery if recovery was spurious? */ +static bool bbr_undo = true; + +/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */ +static bool bbr_fast_path = true; /* default: enabled */ + +/* Use fast ack mode ? */ +static int bbr_fast_ack_mode = 1; /* default: rwnd check off */ + +/* How much to additively increase inflight_hi when entering REFILL? */ +static u32 bbr_refill_add_inc; /* default: disabled */ + +module_param_named(beta, bbr_beta, uint, 0644); +module_param_named(ecn_alpha_gain, bbr_ecn_alpha_gain, uint, 0644); +module_param_named(ecn_alpha_init, bbr_ecn_alpha_init, uint, 0644); +module_param_named(ecn_factor, bbr_ecn_factor, uint, 0644); +module_param_named(ecn_thresh, bbr_ecn_thresh, uint, 0644); +module_param_named(ecn_max_rtt_us, bbr_ecn_max_rtt_us, uint, 0644); +module_param_named(ecn_reprobe_gain, bbr_ecn_reprobe_gain, uint, 0644); +module_param_named(loss_thresh, bbr_loss_thresh, uint, 0664); +module_param_named(full_loss_cnt, bbr_full_loss_cnt, uint, 0664); +module_param_named(full_ecn_cnt, bbr_full_ecn_cnt, uint, 0664); +module_param_named(inflight_headroom, bbr_inflight_headroom, uint, 0664); +module_param_named(bw_probe_pif_gain, bbr_bw_probe_pif_gain, uint, 0664); +module_param_named(bw_probe_reno_gain, bbr_bw_probe_reno_gain, uint, 0664); +module_param_named(bw_probe_max_rounds, bbr_bw_probe_max_rounds, uint, 0664); +module_param_named(bw_probe_rand_rounds, bbr_bw_probe_rand_rounds, uint, 0664); +module_param_named(bw_probe_base_us, bbr_bw_probe_base_us, uint, 0664); +module_param_named(bw_probe_rand_us, bbr_bw_probe_rand_us, uint, 0664); +module_param_named(undo, bbr_undo, bool, 0664); +module_param_named(fast_path, bbr_fast_path, bool, 0664); +module_param_named(fast_ack_mode, bbr_fast_ack_mode, uint, 0664); +module_param_named(refill_add_inc, bbr_refill_add_inc, uint, 0664); + +static void bbr2_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr_init(sk); /* run shared init code for v1 and v2 */ + + /* BBR v2 parameters: */ + bbr->params.beta = min_t(u32, 0xFFU, bbr_beta); + bbr->params.ecn_alpha_gain = min_t(u32, 0xFFU, bbr_ecn_alpha_gain); + bbr->params.ecn_alpha_init = min_t(u32, BBR_UNIT, bbr_ecn_alpha_init); + bbr->params.ecn_factor = min_t(u32, 0xFFU, bbr_ecn_factor); + bbr->params.ecn_thresh = min_t(u32, 0xFFU, bbr_ecn_thresh); + bbr->params.ecn_max_rtt_us = min_t(u32, 0x7ffffU, bbr_ecn_max_rtt_us); + bbr->params.ecn_reprobe_gain = min_t(u32, 0x1FF, bbr_ecn_reprobe_gain); + bbr->params.loss_thresh = min_t(u32, 0xFFU, bbr_loss_thresh); + bbr->params.full_loss_cnt = min_t(u32, 0xFU, bbr_full_loss_cnt); + bbr->params.full_ecn_cnt = min_t(u32, 0x3U, bbr_full_ecn_cnt); + bbr->params.inflight_headroom = + min_t(u32, 0xFFU, bbr_inflight_headroom); + bbr->params.bw_probe_pif_gain = + min_t(u32, 0x1FFU, bbr_bw_probe_pif_gain); + bbr->params.bw_probe_reno_gain = + min_t(u32, 0x1FFU, bbr_bw_probe_reno_gain); + bbr->params.bw_probe_max_rounds = + min_t(u32, 0xFFU, bbr_bw_probe_max_rounds); + bbr->params.bw_probe_rand_rounds = + min_t(u32, 0xFU, bbr_bw_probe_rand_rounds); + bbr->params.bw_probe_base_us = + min_t(u32, (1 << 26) - 1, bbr_bw_probe_base_us); + bbr->params.bw_probe_rand_us = + min_t(u32, (1 << 26) - 1, bbr_bw_probe_rand_us); + bbr->params.undo = bbr_undo; + bbr->params.fast_path = bbr_fast_path ? 1 : 0; + bbr->params.refill_add_inc = min_t(u32, 0x3U, bbr_refill_add_inc); + + /* BBR v2 state: */ + bbr->initialized = 1; + /* Start sampling ECN mark rate after first full flight is ACKed: */ + bbr->loss_round_delivered = tp->delivered + 1; + bbr->loss_round_start = 0; + bbr->undo_bw_lo = 0; + bbr->undo_inflight_lo = 0; + bbr->undo_inflight_hi = 0; + bbr->loss_events_in_round = 0; + bbr->startup_ecn_rounds = 0; + bbr2_reset_congestion_signals(sk); + bbr->bw_lo = ~0U; + bbr->bw_hi[0] = 0; + bbr->bw_hi[1] = 0; + bbr->inflight_lo = ~0U; + bbr->inflight_hi = ~0U; + bbr->bw_probe_up_cnt = ~0U; + bbr->bw_probe_up_acks = 0; + bbr->bw_probe_up_rounds = 0; + bbr->probe_wait_us = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_INIT; + bbr->rounds_since_probe = 0; + bbr->bw_probe_samples = 0; + bbr->prev_probe_too_high = 0; + bbr->ecn_eligible = 0; + bbr->ecn_alpha = bbr->params.ecn_alpha_init; + bbr->alpha_last_delivered = 0; + bbr->alpha_last_delivered_ce = 0; + + tp->fast_ack_mode = min_t(u32, 0x2U, bbr_fast_ack_mode); + + if ((tp->ecn_flags & TCP_ECN_OK) && bbr_ecn_enable) + tp->ecn_flags |= TCP_ECN_ECT_PERMANENT; +} + +/* Core TCP stack informs us that the given skb was just marked lost. */ +static void bbr2_skb_marked_lost(struct sock *sk, const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + struct rate_sample rs; + + /* Capture "current" data over the full round trip of loss, + * to have a better chance to see the full capacity of the path. + */ + if (!bbr->loss_in_round) /* first loss in this round trip? */ + bbr->loss_round_delivered = tp->delivered; /* set round trip */ + bbr->loss_in_round = 1; + bbr->loss_in_cycle = 1; + + if (!bbr->bw_probe_samples) + return; /* not an skb sent while probing for bandwidth */ + if (unlikely(!scb->tx.delivered_mstamp)) + return; /* skb was SACKed, reneged, marked lost; ignore it */ + /* We are probing for bandwidth. Construct a rate sample that + * estimates what happened in the flight leading up to this lost skb, + * then see if the loss rate went too high, and if so at which packet. + */ + memset(&rs, 0, sizeof(rs)); + rs.tx_in_flight = scb->tx.in_flight; + rs.lost = tp->lost - scb->tx.lost; + rs.is_app_limited = scb->tx.is_app_limited; + if (bbr2_is_inflight_too_high(sk, &rs)) { + rs.tx_in_flight = bbr2_inflight_hi_from_lost_skb(sk, &rs, skb); + bbr2_handle_inflight_too_high(sk, &rs); + } +} + +/* Revert short-term model if current loss recovery event was spurious. */ +static u32 bbr2_undo_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->debug.undo = 1; + bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ + bbr->full_bw_cnt = 0; + bbr->loss_in_round = 0; + + if (!bbr->params.undo) + return tp->snd_cwnd; + + /* Revert to cwnd and other state saved before loss episode. */ + bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo); + bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo); + bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi); + return bbr->prior_cwnd; +} + +/* Entering loss recovery, so save state for when we undo recovery. */ +static u32 bbr2_ssthresh(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr_save_cwnd(sk); + /* For undo, save state that adapts based on loss signal. */ + bbr->undo_bw_lo = bbr->bw_lo; + bbr->undo_inflight_lo = bbr->inflight_lo; + bbr->undo_inflight_hi = bbr->inflight_hi; + return tcp_sk(sk)->snd_ssthresh; +} + +static enum tcp_bbr2_phase bbr2_get_phase(struct bbr *bbr) +{ + switch (bbr->mode) { + case BBR_STARTUP: + return BBR2_PHASE_STARTUP; + case BBR_DRAIN: + return BBR2_PHASE_DRAIN; + case BBR_PROBE_BW: + break; + case BBR_PROBE_RTT: + return BBR2_PHASE_PROBE_RTT; + default: + return BBR2_PHASE_INVALID; + } + switch (bbr->cycle_idx) { + case BBR_BW_PROBE_UP: + return BBR2_PHASE_PROBE_BW_UP; + case BBR_BW_PROBE_DOWN: + return BBR2_PHASE_PROBE_BW_DOWN; + case BBR_BW_PROBE_CRUISE: + return BBR2_PHASE_PROBE_BW_CRUISE; + case BBR_BW_PROBE_REFILL: + return BBR2_PHASE_PROBE_BW_REFILL; + default: + return BBR2_PHASE_INVALID; + } +} + +static size_t bbr2_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || + ext & (1 << (INET_DIAG_VEGASINFO - 1))) { + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk)); + u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk)); + u64 bw_lo = bbr->bw_lo == ~0U ? + ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo); + + memset(&info->bbr2, 0, sizeof(info->bbr2)); + info->bbr2.bbr_bw_lsb = (u32)bw; + info->bbr2.bbr_bw_msb = (u32)(bw >> 32); + info->bbr2.bbr_min_rtt = bbr->min_rtt_us; + info->bbr2.bbr_pacing_gain = bbr->pacing_gain; + info->bbr2.bbr_cwnd_gain = bbr->cwnd_gain; + info->bbr2.bbr_bw_hi_lsb = (u32)bw_hi; + info->bbr2.bbr_bw_hi_msb = (u32)(bw_hi >> 32); + info->bbr2.bbr_bw_lo_lsb = (u32)bw_lo; + info->bbr2.bbr_bw_lo_msb = (u32)(bw_lo >> 32); + info->bbr2.bbr_mode = bbr->mode; + info->bbr2.bbr_phase = (__u8)bbr2_get_phase(bbr); + info->bbr2.bbr_version = (__u8)2; + info->bbr2.bbr_inflight_lo = bbr->inflight_lo; + info->bbr2.bbr_inflight_hi = bbr->inflight_hi; + info->bbr2.bbr_extra_acked = bbr_extra_acked(sk); + *attr = INET_DIAG_BBRINFO; + return sizeof(info->bbr2); + } + return 0; +} + +static void bbr2_set_state(struct sock *sk, u8 new_state) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (new_state == TCP_CA_Loss) { + struct rate_sample rs = { .losses = 1 }; + struct bbr_context ctx = { 0 }; + + bbr->prev_ca_state = TCP_CA_Loss; + bbr->full_bw = 0; + if (!bbr2_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) { + /* bbr_adapt_lower_bounds() needs cwnd before + * we suffered an RTO, to update inflight_lo: + */ + bbr->inflight_lo = + max(tp->snd_cwnd, bbr->prior_cwnd); + } + bbr_debug(sk, 0, &rs, &ctx); + } else if (bbr->prev_ca_state == TCP_CA_Loss && + new_state != TCP_CA_Loss) { + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr->try_fast_path = 0; /* bound cwnd using latest model */ + } +} + +static struct tcp_congestion_ops tcp_bbr2_cong_ops __read_mostly = { + .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS, + .name = "bbr2", + .owner = THIS_MODULE, + .init = bbr2_init, + .cong_control = bbr2_main, + .sndbuf_expand = bbr_sndbuf_expand, + .skb_marked_lost = bbr2_skb_marked_lost, + .undo_cwnd = bbr2_undo_cwnd, + .cwnd_event = bbr_cwnd_event, + .ssthresh = bbr2_ssthresh, + .tso_segs = bbr_tso_segs, + .get_info = bbr2_get_info, + .set_state = bbr2_set_state, +}; + +static int __init bbr_register(void) +{ + BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); + return tcp_register_congestion_control(&tcp_bbr2_cong_ops); +} + +static void __exit bbr_unregister(void) +{ + tcp_unregister_congestion_control(&tcp_bbr2_cong_ops); +} + +module_init(bbr_register); +module_exit(bbr_unregister); + +MODULE_AUTHOR("Van Jacobson "); +MODULE_AUTHOR("Neal Cardwell "); +MODULE_AUTHOR("Yuchung Cheng "); +MODULE_AUTHOR("Soheil Hassas Yeganeh "); +MODULE_AUTHOR("Priyaranjan Jha "); +MODULE_AUTHOR("Yousuk Seung "); +MODULE_AUTHOR("Kevin Yang "); +MODULE_AUTHOR("Arjun Roy "); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 9b9b02052fd36..1cdcb4df0eb7e 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -138,10 +138,9 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, struct sk_psock *psock = sk_psock_get(sk); int ret; - if (unlikely(!psock)) { - sk_msg_free(sk, msg); - return 0; - } + if (unlikely(!psock)) + return -EPIPE; + ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) : tcp_bpf_push_locked(sk, msg, bytes, flags, false); sk_psock_put(sk, psock); @@ -335,7 +334,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, tosend); + sk_msg_return(sk, msg, msg->sg.size); release_sock(sk); ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); @@ -375,8 +374,11 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, } if (msg && msg->sg.data[msg->sg.start].page_link && - msg->sg.data[msg->sg.start].length) + msg->sg.data[msg->sg.start].length) { + if (eval == __SK_REDIRECT) + sk_mem_charge(sk, msg->sg.size); goto more_data; + } } return ret; } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db5831e6c136a..153ed9010c0c2 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -179,6 +179,7 @@ void tcp_init_congestion_control(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); tcp_sk(sk)->prior_ssthresh = 0; + tcp_sk(sk)->fast_ack_mode = 0; if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bfe4112e000c0..4aaf5ca6ce9c8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -349,7 +349,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { @@ -360,7 +360,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tp->ecn_flags |= TCP_ECN_SEEN; break; default: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); tp->ecn_flags |= TCP_ECN_SEEN; break; @@ -1079,7 +1079,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) */ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) { + struct sock *sk = (struct sock *)tp; + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + tp->lost += tcp_skb_pcount(skb); + if (ca_ops->skb_marked_lost) + ca_ops->skb_marked_lost(sk, skb); } void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) @@ -1460,6 +1465,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); + /* Adjust tx.in_flight as pcount is shifted from skb to prev. */ + if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount, + "prev in_flight: %u skb in_flight: %u pcount: %u", + TCP_SKB_CB(prev)->tx.in_flight, + TCP_SKB_CB(skb)->tx.in_flight, + pcount)) + TCP_SKB_CB(skb)->tx.in_flight = 0; + else + TCP_SKB_CB(skb)->tx.in_flight -= pcount; + TCP_SKB_CB(prev)->tx.in_flight += pcount; + /* When we're adding to gso_segs == 1, gso_size will be zero, * in theory this shouldn't be necessary but as long as DSACK * code can come after this skb later on it's better to keep @@ -3790,6 +3806,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); + tcp_rate_check_app_limited(sk); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3887,6 +3904,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); + rs.is_ece = !!(flag & FLAG_ECE); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); @@ -5465,13 +5483,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && + (tp->fast_ack_mode == 1 || /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). * If application uses SO_RCVLOWAT, we want send ack now if * we have not received enough bytes to satisfy the condition. */ - (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || - __tcp_select_window(sk) >= tp->rcv_wnd)) || + (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || + __tcp_select_window(sk) >= tp->rcv_wnd))) || /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5079832af5c10..0bfecd72de05a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -377,7 +377,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, th->cwr = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } - } else if (!tcp_ca_needs_ecn(sk)) { + } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) && + !tcp_ca_needs_ecn(sk)) { /* ACK or retransmitted segment: clear ECT|CE */ INET_ECN_dontxmit(sk); } @@ -1533,7 +1534,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int nsize, old_factor; + int nsize, old_factor, inflight_prev; long limit; int nlen; u8 flags; @@ -1610,6 +1611,15 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (diff) tcp_adjust_pcount(sk, skb, diff); + + /* Set buff tx.in_flight as if buff were sent by itself. */ + inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor; + if (WARN_ONCE(inflight_prev < 0, + "inconsistent: tx.in_flight: %u old_factor: %d", + TCP_SKB_CB(skb)->tx.in_flight, old_factor)) + inflight_prev = 0; + TCP_SKB_CB(buff)->tx.in_flight = inflight_prev + + tcp_skb_pcount(buff); } /* Link BUFF into the send queue. */ @@ -1978,13 +1988,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 min_tso, tso_segs; - - min_tso = ca_ops->min_tso_segs ? - ca_ops->min_tso_segs(sk) : - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + u32 tso_segs; - tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); + tso_segs = ca_ops->tso_segs ? + ca_ops->tso_segs(sk, mss_now) : + tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); return min_t(u32, tso_segs, sk->sk_gso_max_segs); } @@ -2619,6 +2628,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache; list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); + tcp_set_tx_in_flight(sk, skb); goto repair; /* Skip network transmission */ } @@ -3719,6 +3729,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) */ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; int space, err = 0; @@ -3733,8 +3744,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) * private TCP options. The cost is reduced data space in SYN :( */ tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp); + /* Sync mss_cache after updating the mss_clamp */ + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - + space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; space = min_t(size_t, space, fo->size); diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index fbab921670cc9..796fa6e5310ce 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -34,6 +34,24 @@ * ready to send in the write queue. */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 in_flight; + + /* Check, sanitize, and record packets in flight after skb was sent. */ + in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb); + if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX, + "insane in_flight %u cc %s mss %u " + "cwnd %u pif %u %u %u %u\n", + in_flight, inet_csk(sk)->icsk_ca_ops->name, + tp->mss_cache, tp->snd_cwnd, + tp->packets_out, tp->retrans_out, + tp->sacked_out, tp->lost_out)) + in_flight = TCPCB_IN_FLIGHT_MAX; + TCP_SKB_CB(skb)->tx.in_flight = in_flight; +} + /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ @@ -66,7 +84,9 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; + TCP_SKB_CB(skb)->tx.lost = tp->lost; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; + tcp_set_tx_in_flight(sk, skb); } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) @@ -87,17 +107,20 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, if (!rs->prior_delivered || after(scb->tx.delivered, rs->prior_delivered)) { + rs->prior_lost = scb->tx.lost; rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; + rs->tx_in_flight = scb->tx.in_flight; /* Record send time of most recently ACKed packet: */ tp->first_tx_mstamp = tcp_skb_timestamp_us(skb); /* Find the duration of the "send phase" of this window: */ - rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, - scb->tx.first_tx_mstamp); + rs->interval_us = tcp_stamp32_us_delta( + tp->first_tx_mstamp, + scb->tx.first_tx_mstamp); } /* Mark off the skb delivered once it's sacked to avoid being @@ -139,6 +162,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, return; } rs->delivered = tp->delivered - rs->prior_delivered; + rs->lost = tp->lost - rs->prior_lost; rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; /* delivered_ce occupies less than 32 bits in the skb control block */ @@ -150,7 +174,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * longer phase. */ snd_us = rs->interval_us; /* send phase */ - ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, + ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 20cf4a98c69d8..b5f7e49a003a2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -607,6 +607,7 @@ void tcp_write_timer_handler(struct sock *sk) goto out; } + tcp_rate_check_app_limited(sk); tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 26c00ebf4fbae..7f555d2e5357f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -275,6 +275,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); + struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; @@ -286,16 +287,15 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) goto out; rc = -ENODEV; if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); - if (llc->dev && addr->sllc_arphrd != llc->dev->type) { - dev_put(llc->dev); - llc->dev = NULL; + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + if (dev && addr->sllc_arphrd != dev->type) { + dev_put(dev); + dev = NULL; } } else - llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); - if (!llc->dev) + dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); + if (!dev) goto out; - netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL); rc = -EUSERS; llc->laddr.lsap = llc_ui_autoport(); if (!llc->laddr.lsap) @@ -304,6 +304,12 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) sap = llc_sap_open(llc->laddr.lsap, NULL); if (!sap) goto out; + + /* Note: We do not expect errors from this point. */ + llc->dev = dev; + netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL); + dev = NULL; + memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); /* assign new connection to its SAP */ @@ -311,6 +317,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; out: + dev_put(dev); return rc; } @@ -333,6 +340,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); + struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; @@ -348,25 +356,27 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); - if (llc->dev) { + dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); + if (dev) { if (is_zero_ether_addr(addr->sllc_mac)) - memcpy(addr->sllc_mac, llc->dev->dev_addr, + memcpy(addr->sllc_mac, dev->dev_addr, IFHWADDRLEN); - if (addr->sllc_arphrd != llc->dev->type || + if (addr->sllc_arphrd != dev->type || !ether_addr_equal(addr->sllc_mac, - llc->dev->dev_addr)) { + dev->dev_addr)) { rc = -EINVAL; - llc->dev = NULL; + dev = NULL; } } - } else - llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, + } else { + dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - dev_hold_track(llc->dev, &llc->dev_tracker, GFP_ATOMIC); + } + dev_hold(dev); rcu_read_unlock(); - if (!llc->dev) + if (!dev) goto out; + if (!addr->sllc_sap) { rc = -EUSERS; addr->sllc_sap = llc_ui_autoport(); @@ -398,6 +408,12 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) goto out_put; } } + + /* Note: We do not expect errors from this point. */ + llc->dev = dev; + netdev_tracker_alloc(llc->dev, &llc->dev_tracker, GFP_KERNEL); + dev = NULL; + llc->laddr.lsap = addr->sllc_sap; memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); @@ -408,6 +424,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) out_put: llc_sap_put(sap); out: + dev_put(dev); release_sock(sk); return rc; } diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 26d2f8ba70297..03fb8070bdae6 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -453,9 +453,17 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw, return 0; } - if (WARN_ON_ONCE((status->encoding != RX_ENC_HE && streams > 4) || - (status->encoding == RX_ENC_HE && streams > 8))) + if (unlikely(status->encoding != RX_ENC_HE && streams > 4)) { + pr_warn_once("%s: status->encoding != RX_ENC_HE (%u != %d) && streams > 4 (%d > 4)\n", + __func__, status->encoding, RX_ENC_HE, streams); return 0; + } + + if (unlikely(status->encoding == RX_ENC_HE && streams > 8)) { + pr_warn_once("%s: status->encoding == RX_ENC_HE (%u == %d) && streams > 8 (%d > 8)\n", + __func__, status->encoding, RX_ENC_HE, streams); + return 0; + } duration = airtime_mcs_groups[group].duration[idx]; duration <<= airtime_mcs_groups[group].shift; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 87a208089caf7..58ff57dc669c4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2148,14 +2148,12 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, const struct mesh_setup *setup) { u8 *new_ie; - const u8 *old_ie; struct ieee80211_sub_if_data *sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); int i; /* allocate information elements */ new_ie = NULL; - old_ie = ifmsh->ie; if (setup->ie_len) { new_ie = kmemdup(setup->ie, setup->ie_len, @@ -2165,7 +2163,6 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, } ifmsh->ie_len = setup->ie_len; ifmsh->ie = new_ie; - kfree(old_ie); /* now copy the rest of the setup parameters */ ifmsh->mesh_id_len = setup->mesh_id_len; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e87bccaab561f..95aaf00c876c3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2380,7 +2380,7 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef); u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype); -u8 *ieee80211_ie_build_he_cap(u8 *pos, +u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos, const struct ieee80211_sta_he_cap *he_cap, u8 *end); void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 15ac08d111ea1..6847fdf934392 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -580,7 +580,7 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, return -ENOMEM; pos = skb_put(skb, ie_len); - ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len); + ieee80211_ie_build_he_cap(0, pos, he_cap, pos + ie_len); return 0; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 744842c4513b1..c4d3e2da73f23 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -636,7 +636,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_supported_band *sband) { - u8 *pos; + u8 *pos, *pre_he_pos; const struct ieee80211_sta_he_cap *he_cap = NULL; struct ieee80211_chanctx_conf *chanctx_conf; u8 he_cap_size; @@ -653,16 +653,21 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, he_cap = ieee80211_get_he_iftype_cap(sband, ieee80211_vif_type_p2p(&sdata->vif)); - if (!he_cap || !reg_cap) + if (!he_cap || !chanctx_conf || !reg_cap) return; + /* get a max size estimate */ he_cap_size = 2 + 1 + sizeof(he_cap->he_cap_elem) + ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) + ieee80211_he_ppe_size(he_cap->ppe_thres[0], he_cap->he_cap_elem.phy_cap_info); pos = skb_put(skb, he_cap_size); - ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size); + pre_he_pos = pos; + pos = ieee80211_ie_build_he_cap(sdata->u.mgd.flags, + pos, he_cap, pos + he_cap_size); + /* trim excess if any */ + skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos)); ieee80211_ie_build_he_6ghz_cap(sdata, skb); } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 8c6416129d5be..810ea5cd66814 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -664,7 +664,9 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, * needs to be fixed. */ if (rates[i].flags & IEEE80211_TX_RC_MCS) { - WARN_ON(rates[i].idx > 76); + if (unlikely(rates[i].idx > 76)) + pr_warn("%s: rates[%d].idx == %d > 76\n", + __func__, i, rates[i].idx); if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && info->control.use_cts_prot) @@ -674,7 +676,9 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, } if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) { - WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9); + if (unlikely(ieee80211_rate_get_vht_mcs(&rates[i]) > 9)) + pr_warn("%s: ieee80211_rate_get_vht_mcs(&rates[%d]) == %d > 9\n", + __func__, i, ieee80211_rate_get_vht_mcs(&rates[i])); continue; } @@ -685,7 +689,9 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, } /* RC is busted */ - if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) { + if (unlikely(rates[i].idx >= sband->n_bitrates)) { + pr_warn_once("%s: rates[%d].idx == %d >= sband->n_bitrates == %d\n", + __func__, i, rates[i].idx, sband->n_bitrates); rates[i].idx = -1; continue; } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 9c3b7fc377c17..a65d55bbb46b1 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -438,8 +438,13 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, if (minstrel_ht_is_legacy_group(group)) overhead = mi->overhead_legacy; - else + else { ampdu_len = minstrel_ht_avg_ampdu_len(mi); + if (unlikely(!ampdu_len)) { + pr_err_once("%s: ampdu_len == 0!\n", __func__); + ampdu_len = 1; + } + } nsecs = 1000 * overhead / ampdu_len; nsecs += minstrel_mcs_groups[group].duration[rate] << diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f71b042a5c8bb..342c2bfe27091 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1974,7 +1974,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, if (he_cap && cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), IEEE80211_CHAN_NO_HE)) { - pos = ieee80211_ie_build_he_cap(pos, he_cap, end); + pos = ieee80211_ie_build_he_cap(0, pos, he_cap, end); if (!pos) goto out_err; } @@ -2918,10 +2918,11 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) he_cap->he_cap_elem.phy_cap_info); } -u8 *ieee80211_ie_build_he_cap(u8 *pos, +u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos, const struct ieee80211_sta_he_cap *he_cap, u8 *end) { + struct ieee80211_he_cap_elem elem; u8 n; u8 ie_len; u8 *orig_pos = pos; @@ -2934,7 +2935,23 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos, if (!he_cap) return orig_pos; - n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem); + /* modify on stack first to calculate 'n' and 'ie_len' correctly */ + elem = he_cap->he_cap_elem; + + if (disable_flags & IEEE80211_STA_DISABLE_40MHZ) + elem.phy_cap_info[0] &= + ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G); + + if (disable_flags & IEEE80211_STA_DISABLE_160MHZ) + elem.phy_cap_info[0] &= + ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; + + if (disable_flags & IEEE80211_STA_DISABLE_80P80MHZ) + elem.phy_cap_info[0] &= + ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; + + n = ieee80211_he_mcs_nss_size(&elem); ie_len = 2 + 1 + sizeof(he_cap->he_cap_elem) + n + ieee80211_he_ppe_size(he_cap->ppe_thres[0], @@ -2948,8 +2965,8 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos, *pos++ = WLAN_EID_EXT_HE_CAPABILITY; /* Fixed data */ - memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem)); - pos += sizeof(he_cap->he_cap_elem); + memcpy(pos, &elem, sizeof(elem)); + pos += sizeof(elem); memcpy(pos, &he_cap->he_mcs_nss_supp, n); pos += n; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1c72f25f083ea..014c9d88f9479 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1196,6 +1196,7 @@ static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, g tcp_skb_entail(ssk, skb); return skb; } + tcp_skb_tsorted_anchor_cleanup(skb); kfree_skb(skb); return NULL; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index ae4488a13c70c..ceb38a7b37cb7 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -556,6 +556,12 @@ static const struct nf_ct_ext_type helper_extend = { .id = NF_CT_EXT_HELPER, }; +void nf_ct_set_auto_assign_helper_warned(struct net *net) +{ + nf_ct_pernet(net)->auto_assign_helper_warned = true; +} +EXPORT_SYMBOL_GPL(nf_ct_set_auto_assign_helper_warned); + void nf_conntrack_helper_pernet_init(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index d1582b888c0d8..8ec55cd72572e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -341,8 +341,8 @@ static void tcp_options(const struct sk_buff *skb, if (!ptr) return; - state->td_scale = - state->flags = 0; + state->td_scale = 0; + state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; while (length > 0) { int opcode=*ptr++; @@ -862,6 +862,16 @@ static bool tcp_can_early_drop(const struct nf_conn *ct) return false; } +static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) +{ + state->td_end = 0; + state->td_maxend = 0; + state->td_maxwin = 0; + state->td_maxack = 0; + state->td_scale = 0; + state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; +} + /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -968,8 +978,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; - memset(&ct->proto.tcp.seen[dir], 0, - sizeof(struct ip_ct_tcp_state)); + nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]); break; } ct->proto.tcp.last_index = index; diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 5c57ade6bd05a..0ccabf3fa6aa3 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -6,12 +6,29 @@ #include #include #include +#include static unsigned int nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + struct vlan_ethhdr *veth; + __be16 proto; + switch (skb->protocol) { + case htons(ETH_P_8021Q): + veth = (struct vlan_ethhdr *)skb_mac_header(skb); + proto = veth->h_vlan_encapsulated_proto; + break; + case htons(ETH_P_PPP_SES): + proto = nf_flow_pppoe_proto(skb); + break; + default: + proto = skb->protocol; + break; + } + + switch (proto) { case htons(ETH_P_IP): return nf_flow_offload_ip_hook(priv, skb, state); case htons(ETH_P_IPV6): diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 889cf88d3dba6..6257d87c3a56d 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -8,8 +8,6 @@ #include #include #include -#include -#include #include #include #include @@ -239,22 +237,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, return NF_STOLEN; } -static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) -{ - __be16 proto; - - proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + - sizeof(struct pppoe_hdr))); - switch (proto) { - case htons(PPP_IP): - return htons(ETH_P_IP); - case htons(PPP_IPV6): - return htons(ETH_P_IPV6); - } - - return 0; -} - static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto, u32 *offset) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d71a33ae39b35..1f5a0eece0d14 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9275,17 +9275,23 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) } EXPORT_SYMBOL_GPL(nft_parse_u32_check); -static unsigned int nft_parse_register(const struct nlattr *attr) +static unsigned int nft_parse_register(const struct nlattr *attr, u32 *preg) { unsigned int reg; reg = ntohl(nla_get_be32(attr)); switch (reg) { case NFT_REG_VERDICT...NFT_REG_4: - return reg * NFT_REG_SIZE / NFT_REG32_SIZE; + *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE; + break; + case NFT_REG32_00...NFT_REG32_15: + *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + break; default: - return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + return -ERANGE; } + + return 0; } /** @@ -9327,7 +9333,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) u32 reg; int err; - reg = nft_parse_register(attr); + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + err = nft_validate_register_load(reg, len); if (err < 0) return err; @@ -9382,7 +9391,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx, int err; u32 reg; - reg = nft_parse_register(attr); + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + err = nft_validate_register_store(ctx, reg, data, type, len); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 36e73f9828c50..8af98239655db 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -201,7 +201,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) const struct nft_rule_dp *rule, *last_rule; const struct net *net = nft_net(pkt); const struct nft_expr *expr, *last; - struct nft_regs regs; + struct nft_regs regs = {}; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; bool genbit = READ_ONCE(net->nft.gencursor); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 5adf8bb628a80..9c7472af9e4a1 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1041,6 +1041,9 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, if (err < 0) goto err_put_helper; + /* Avoid the bogus warning, helper will be assigned after CT init */ + nf_ct_set_auto_assign_helper_warned(ctx->net); + return 0; err_put_helper: diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7b344035bfe3f..47a876ccd2881 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -159,6 +159,8 @@ EXPORT_SYMBOL(do_trace_netlink_extack); static inline u32 netlink_group_mask(u32 group) { + if (group > 32) + return 0; return group ? 1 << (group - 1) : 0; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c07afff57dd32..4a947c13c813a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -734,6 +734,57 @@ static bool skb_nfct_cached(struct net *net, } #if IS_ENABLED(CONFIG_NF_NAT) +static void ovs_nat_update_key(struct sw_flow_key *key, + const struct sk_buff *skb, + enum nf_nat_manip_type maniptype) +{ + if (maniptype == NF_NAT_MANIP_SRC) { + __be16 src; + + key->ct_state |= OVS_CS_F_SRC_NAT; + if (key->eth.type == htons(ETH_P_IP)) + key->ipv4.addr.src = ip_hdr(skb)->saddr; + else if (key->eth.type == htons(ETH_P_IPV6)) + memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, + sizeof(key->ipv6.addr.src)); + else + return; + + if (key->ip.proto == IPPROTO_UDP) + src = udp_hdr(skb)->source; + else if (key->ip.proto == IPPROTO_TCP) + src = tcp_hdr(skb)->source; + else if (key->ip.proto == IPPROTO_SCTP) + src = sctp_hdr(skb)->source; + else + return; + + key->tp.src = src; + } else { + __be16 dst; + + key->ct_state |= OVS_CS_F_DST_NAT; + if (key->eth.type == htons(ETH_P_IP)) + key->ipv4.addr.dst = ip_hdr(skb)->daddr; + else if (key->eth.type == htons(ETH_P_IPV6)) + memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, + sizeof(key->ipv6.addr.dst)); + else + return; + + if (key->ip.proto == IPPROTO_UDP) + dst = udp_hdr(skb)->dest; + else if (key->ip.proto == IPPROTO_TCP) + dst = tcp_hdr(skb)->dest; + else if (key->ip.proto == IPPROTO_SCTP) + dst = sctp_hdr(skb)->dest; + else + return; + + key->tp.dst = dst; + } +} + /* Modelled after nf_nat_ipv[46]_fn(). * range is only used for new, uninitialized NAT state. * Returns either NF_ACCEPT or NF_DROP. @@ -741,7 +792,7 @@ static bool skb_nfct_cached(struct net *net, static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype) + enum nf_nat_manip_type maniptype, struct sw_flow_key *key) { int hooknum, nh_off, err = NF_ACCEPT; @@ -813,58 +864,11 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, push: skb_push_rcsum(skb, nh_off); - return err; -} - -static void ovs_nat_update_key(struct sw_flow_key *key, - const struct sk_buff *skb, - enum nf_nat_manip_type maniptype) -{ - if (maniptype == NF_NAT_MANIP_SRC) { - __be16 src; - - key->ct_state |= OVS_CS_F_SRC_NAT; - if (key->eth.type == htons(ETH_P_IP)) - key->ipv4.addr.src = ip_hdr(skb)->saddr; - else if (key->eth.type == htons(ETH_P_IPV6)) - memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, - sizeof(key->ipv6.addr.src)); - else - return; - - if (key->ip.proto == IPPROTO_UDP) - src = udp_hdr(skb)->source; - else if (key->ip.proto == IPPROTO_TCP) - src = tcp_hdr(skb)->source; - else if (key->ip.proto == IPPROTO_SCTP) - src = sctp_hdr(skb)->source; - else - return; - - key->tp.src = src; - } else { - __be16 dst; - - key->ct_state |= OVS_CS_F_DST_NAT; - if (key->eth.type == htons(ETH_P_IP)) - key->ipv4.addr.dst = ip_hdr(skb)->daddr; - else if (key->eth.type == htons(ETH_P_IPV6)) - memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, - sizeof(key->ipv6.addr.dst)); - else - return; - - if (key->ip.proto == IPPROTO_UDP) - dst = udp_hdr(skb)->dest; - else if (key->ip.proto == IPPROTO_TCP) - dst = tcp_hdr(skb)->dest; - else if (key->ip.proto == IPPROTO_SCTP) - dst = sctp_hdr(skb)->dest; - else - return; + /* Update the flow key if NAT successful. */ + if (err == NF_ACCEPT) + ovs_nat_update_key(key, skb, maniptype); - key->tp.dst = dst; - } + return err; } /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ @@ -906,7 +910,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } else { return NF_ACCEPT; /* Connection is not NATed. */ } - err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key); if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { if (ct->status & IPS_SRC_NAT) { @@ -916,17 +920,13 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, maniptype = NF_NAT_MANIP_SRC; err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, - maniptype); + maniptype, key); } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, - NF_NAT_MANIP_SRC); + NF_NAT_MANIP_SRC, key); } } - /* Mark NAT done if successful and update the flow key. */ - if (err == NF_ACCEPT) - ovs_nat_update_key(key, skb, maniptype); - return err; } #else /* !CONFIG_NF_NAT */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index fd1f809e9bc1b..0d677c9c2c805 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2201,8 +2201,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, icmpv6_key->icmpv6_type = ntohs(output->tp.src); icmpv6_key->icmpv6_code = ntohs(output->tp.dst); - if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || - icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { + if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || + swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { struct ovs_key_nd *nd_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 5b1927d66f0da..dac4fdc7488a3 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -78,6 +78,7 @@ struct rfkill_data { struct mutex mtx; wait_queue_head_t read_wait; bool input_handler; + u8 max_size; }; @@ -1153,6 +1154,8 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) if (!data) return -ENOMEM; + data->max_size = RFKILL_EVENT_SIZE_V1; + INIT_LIST_HEAD(&data->events); mutex_init(&data->mtx); init_waitqueue_head(&data->read_wait); @@ -1235,6 +1238,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf, list); sz = min_t(unsigned long, sizeof(ev->ev), count); + sz = min_t(unsigned long, sz, data->max_size); ret = sz; if (copy_to_user(buf, &ev->ev, sz)) ret = -EFAULT; @@ -1249,6 +1253,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf, static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { + struct rfkill_data *data = file->private_data; struct rfkill *rfkill; struct rfkill_event_ext ev; int ret; @@ -1263,6 +1268,7 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, * our API version even in a write() call, if it cares. */ count = min(count, sizeof(ev)); + count = min_t(size_t, count, data->max_size); if (copy_from_user(&ev, buf, count)) return -EFAULT; @@ -1322,31 +1328,47 @@ static int rfkill_fop_release(struct inode *inode, struct file *file) return 0; } -#ifdef CONFIG_RFKILL_INPUT static long rfkill_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct rfkill_data *data = file->private_data; + int ret = -ENOSYS; + u32 size; if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC) return -ENOSYS; - if (_IOC_NR(cmd) != RFKILL_IOC_NOINPUT) - return -ENOSYS; - mutex_lock(&data->mtx); - - if (!data->input_handler) { - if (atomic_inc_return(&rfkill_input_disabled) == 1) - printk(KERN_DEBUG "rfkill: input handler disabled\n"); - data->input_handler = true; + switch (_IOC_NR(cmd)) { +#ifdef CONFIG_RFKILL_INPUT + case RFKILL_IOC_NOINPUT: + if (!data->input_handler) { + if (atomic_inc_return(&rfkill_input_disabled) == 1) + printk(KERN_DEBUG "rfkill: input handler disabled\n"); + data->input_handler = true; + } + ret = 0; + break; +#endif + case RFKILL_IOC_MAX_SIZE: + if (get_user(size, (__u32 __user *)arg)) { + ret = -EFAULT; + break; + } + if (size < RFKILL_EVENT_SIZE_V1 || size > U8_MAX) { + ret = -EINVAL; + break; + } + data->max_size = size; + ret = 0; + break; + default: + break; } - mutex_unlock(&data->mtx); - return 0; + return ret; } -#endif static const struct file_operations rfkill_fops = { .owner = THIS_MODULE, @@ -1355,10 +1377,8 @@ static const struct file_operations rfkill_fops = { .write = rfkill_fop_write, .poll = rfkill_fop_poll, .release = rfkill_fop_release, -#ifdef CONFIG_RFKILL_INPUT .unlocked_ioctl = rfkill_fop_ioctl, .compat_ioctl = compat_ptr_ioctl, -#endif .llseek = no_llseek, }; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7bd6f8a66a3ef..969e532f77a90 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -777,14 +777,12 @@ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); -static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call, - unsigned long expire_at, - unsigned long now, - enum rxrpc_timer_trace why) -{ - trace_rxrpc_timer(call, why, now); - timer_reduce(&call->timer, expire_at); -} +void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why); + +void rxrpc_delete_call_timer(struct rxrpc_call *call); /* * call_object.c @@ -808,6 +806,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); bool rxrpc_queue_call(struct rxrpc_call *); void rxrpc_see_call(struct rxrpc_call *); +bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_cleanup_call(struct rxrpc_call *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index df864e6922679..22e05de5d1ca9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -310,7 +310,7 @@ void rxrpc_process_call(struct work_struct *work) } if (call->state == RXRPC_CALL_COMPLETE) { - del_timer_sync(&call->timer); + rxrpc_delete_call_timer(call); goto out_put; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 4eb91d958a48d..043508fd8d8a5 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -53,10 +53,30 @@ static void rxrpc_call_timer_expired(struct timer_list *t) if (call->state < RXRPC_CALL_COMPLETE) { trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); - rxrpc_queue_call(call); + __rxrpc_queue_call(call); + } else { + rxrpc_put_call(call, rxrpc_call_put); + } +} + +void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why) +{ + if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) { + trace_rxrpc_timer(call, why, now); + if (timer_reduce(&call->timer, expire_at)) + rxrpc_put_call(call, rxrpc_call_put_notimer); } } +void rxrpc_delete_call_timer(struct rxrpc_call *call) +{ + if (del_timer_sync(&call->timer)) + rxrpc_put_call(call, rxrpc_call_put_timer); +} + static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; /* @@ -463,6 +483,17 @@ void rxrpc_see_call(struct rxrpc_call *call) } } +bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +{ + const void *here = __builtin_return_address(0); + int n = atomic_fetch_add_unless(&call->usage, 1, 0); + + if (n == 0) + return false; + trace_rxrpc_call(call->debug_id, op, n, here, NULL); + return true; +} + /* * Note the addition of a ref on a call. */ @@ -510,8 +541,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) spin_unlock_bh(&call->lock); rxrpc_put_call_slot(call); - - del_timer_sync(&call->timer); + rxrpc_delete_call_timer(call); /* Make sure we don't get any more notifications */ write_lock_bh(&rx->recvmsg_lock); @@ -618,6 +648,8 @@ static void rxrpc_destroy_call(struct work_struct *work) struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); struct rxrpc_net *rxnet = call->rxnet; + rxrpc_delete_call_timer(call); + rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); @@ -652,8 +684,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - del_timer_sync(&call->timer); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c index ead3471307ee5..ee269e0e6ee87 100644 --- a/net/rxrpc/server_key.c +++ b/net/rxrpc/server_key.c @@ -84,6 +84,9 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep) prep->payload.data[1] = (struct rxrpc_security *)sec; + if (!sec->preparse_server_key) + return -EINVAL; + return sec->preparse_server_key(prep); } @@ -91,7 +94,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) { const struct rxrpc_security *sec = prep->payload.data[1]; - if (sec) + if (sec && sec->free_preparse_server_key) sec->free_preparse_server_key(prep); } @@ -99,7 +102,7 @@ static void rxrpc_destroy_s(struct key *key) { const struct rxrpc_security *sec = key->payload.data[1]; - if (sec) + if (sec && sec->destroy_server_key) sec->destroy_server_key(key); } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index ec19f625863a0..25718acc0ff00 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -605,22 +605,25 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, if (!ct) return false; if (!net_eq(net, read_pnet(&ct->ct_net))) - return false; + goto drop_ct; if (nf_ct_zone(ct)->id != zone_id) - return false; + goto drop_ct; /* Force conntrack entry direction. */ if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { if (nf_ct_is_confirmed(ct)) nf_ct_kill(ct); - nf_ct_put(ct); - nf_ct_set(skb, NULL, IP_CT_UNTRACKED); - - return false; + goto drop_ct; } return true; + +drop_ct: + nf_ct_put(ct); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); + + return false; } /* Trim the skb to the length specified by the IP/IPv6 header, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index cc544a97c4afd..7f342bc127358 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -930,6 +930,11 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Set peer label for connection. */ + if (security_sctp_assoc_established((struct sctp_association *)asoc, + chunk->skb)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Verify that the chunk length for the COOKIE-ACK is OK. * If we don't do this, any bundled chunks may be junked. */ @@ -945,9 +950,6 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL()); - /* Set peer label for connection. */ - security_inet_conn_established(ep->base.sk, chunk->skb); - /* RFC 2960 5.1 Normal Establishment of an Association * * E) Upon reception of the COOKIE ACK, endpoint "A" will move diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c83fe618767c4..b36d235d2d6d9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1065,7 +1065,9 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt) static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { - if (task->tk_xprt) + if (task->tk_xprt && + !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && + (task->tk_flags & RPC_TASK_MOVEABLE))) return; if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); @@ -1085,8 +1087,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (atomic_read(&clnt->cl_swapper)) - task->tk_flags |= RPC_TASK_SWAPPER; /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index e2c835482791e..ae295844ac55a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -876,6 +876,15 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) ops->rpc_release(calldata); } +static bool xprt_needs_memalloc(struct rpc_xprt *xprt, struct rpc_task *tk) +{ + if (!xprt) + return false; + if (!atomic_read(&xprt->swapper)) + return false; + return test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == tk; +} + /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -884,6 +893,7 @@ static void __rpc_execute(struct rpc_task *task) struct rpc_wait_queue *queue; int task_is_async = RPC_IS_ASYNC(task); int status = 0; + unsigned long pflags = current->flags; WARN_ON_ONCE(RPC_IS_QUEUED(task)); if (RPC_IS_QUEUED(task)) @@ -906,6 +916,10 @@ static void __rpc_execute(struct rpc_task *task) } if (!do_action) break; + if (RPC_IS_SWAPPER(task) || + xprt_needs_memalloc(task->tk_xprt, task)) + current->flags |= PF_MEMALLOC; + trace_rpc_task_run_action(task, do_action); do_action(task); @@ -943,7 +957,7 @@ static void __rpc_execute(struct rpc_task *task) rpc_clear_running(task); spin_unlock(&queue->lock); if (task_is_async) - return; + goto out; /* sync task: sleep here */ trace_rpc_task_sync_sleep(task, task->tk_action); @@ -967,6 +981,8 @@ static void __rpc_execute(struct rpc_task *task) /* Release all resources associated with the task */ rpc_release_task(task); +out: + current_restore_flags(pflags, PF_MEMALLOC); } /* @@ -1023,8 +1039,8 @@ int rpc_malloc(struct rpc_task *task) struct rpc_buffer *buf; gfp_t gfp = GFP_NOFS; - if (RPC_IS_SWAPPER(task)) - gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + if (RPC_IS_ASYNC(task)) + gfp = GFP_NOWAIT | __GFP_NOWARN; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 05c758da6a92a..9d8a7d9f3e412 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -97,7 +97,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj, return 0; ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, @@ -105,33 +105,31 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, char *buf) { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); - struct sockaddr_storage saddr; - struct sock_xprt *sock; - ssize_t ret = -1; + size_t buflen = PAGE_SIZE; + ssize_t ret = -ENOTSOCK; if (!xprt || !xprt_connected(xprt)) { - xprt_put(xprt); - return -ENOTCONN; + ret = -ENOTCONN; + } else if (xprt->ops->get_srcaddr) { + ret = xprt->ops->get_srcaddr(xprt, buf, buflen); + if (ret > 0) { + if (ret < buflen - 1) { + buf[ret] = '\n'; + ret++; + buf[ret] = '\0'; + } + } } - - sock = container_of(xprt, struct sock_xprt, xprt); - mutex_lock(&sock->recv_mutex); - if (sock->sock == NULL || - kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) - goto out; - - ret = sprintf(buf, "%pISc\n", &saddr); -out: - mutex_unlock(&sock->recv_mutex); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) + struct kobj_attribute *attr, char *buf) { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + unsigned short srcport = 0; + size_t buflen = PAGE_SIZE; ssize_t ret; if (!xprt || !xprt_connected(xprt)) { @@ -139,7 +137,11 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, return -ENOTCONN; } - ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" + if (xprt->ops->get_srcport) + srcport = xprt->ops->get_srcport(xprt); + + ret = snprintf(buf, buflen, + "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n" @@ -147,14 +149,11 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs, xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, xprt->sending.qlen, xprt->pending.qlen, - xprt->backlog.qlen, xprt->main, - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? - get_srcport(xprt) : 0, + xprt->backlog.qlen, xprt->main, srcport, atomic_long_read(&xprt->queuelen), - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? - xprt->address_strings[RPC_DISPLAY_PORT] : "0"); + xprt->address_strings[RPC_DISPLAY_PORT]); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, @@ -201,7 +200,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, } xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, @@ -220,7 +219,7 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, xprt_switch->xps_nunique_destaddr_xprts, atomic_long_read(&xprt_switch->xps_queuelen)); xprt_switch_put(xprt_switch); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a02de2bddb28b..396a74974f60f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1503,6 +1503,9 @@ bool xprt_prepare_transmit(struct rpc_task *task) return false; } + if (atomic_read(&xprt->swapper)) + /* This will be clear in __rpc_execute */ + current->flags |= PF_MEMALLOC; return true; } @@ -2112,7 +2115,14 @@ static void xprt_destroy(struct rpc_xprt *xprt) */ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); + /* + * xprt_schedule_autodisconnect() can run after XPRT_LOCKED + * is cleared. We use ->transport_lock to ensure the mod_timer() + * can only run *before* del_time_sync(), never after. + */ + spin_lock(&xprt->transport_lock); del_timer_sync(&xprt->timer); + spin_unlock(&xprt->transport_lock); /* * Destroy sockets etc from the system workqueue so they can diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 42e375dbdadb4..ff78a296fa810 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -235,8 +235,11 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, rx_connect_worker.work); struct rpc_xprt *xprt = &r_xprt->rx_xprt; + unsigned int pflags = current->flags; int rc; + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); if (!rc) { @@ -250,6 +253,7 @@ xprt_rdma_connect_worker(struct work_struct *work) rpcrdma_xprt_disconnect(r_xprt); xprt_unlock_connect(xprt, r_xprt); xprt_wake_pending_tasks(xprt, rc); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -570,8 +574,8 @@ xprt_rdma_allocate(struct rpc_task *task) gfp_t flags; flags = RPCRDMA_DEF_GFP; - if (RPC_IS_SWAPPER(task)) - flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + if (RPC_IS_ASYNC(task)) + flags = GFP_NOWAIT | __GFP_NOWARN; if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, flags)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0f39e08ee580e..11eab0f0333b0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1638,7 +1638,7 @@ static int xs_get_srcport(struct sock_xprt *transport) return port; } -unsigned short get_srcport(struct rpc_xprt *xprt) +static unsigned short xs_sock_srcport(struct rpc_xprt *xprt) { struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); unsigned short ret = 0; @@ -1648,7 +1648,25 @@ unsigned short get_srcport(struct rpc_xprt *xprt) mutex_unlock(&sock->recv_mutex); return ret; } -EXPORT_SYMBOL(get_srcport); + +static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen) +{ + struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); + union { + struct sockaddr sa; + struct sockaddr_storage st; + } saddr; + int ret = -ENOTCONN; + + mutex_lock(&sock->recv_mutex); + if (sock->sock) { + ret = kernel_getsockname(sock->sock, &saddr.sa); + if (ret >= 0) + ret = snprintf(buf, buflen, "%pISc", &saddr.sa); + } + mutex_unlock(&sock->recv_mutex); + return ret; +} static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) { @@ -2052,7 +2070,10 @@ static void xs_udp_setup_socket(struct work_struct *work) struct rpc_xprt *xprt = &transport->xprt; struct socket *sock; int status = -EIO; + unsigned int pflags = current->flags; + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP, false); @@ -2072,6 +2093,7 @@ static void xs_udp_setup_socket(struct work_struct *work) xprt_clear_connecting(xprt); xprt_unlock_connect(xprt, transport); xprt_wake_pending_tasks(xprt, status); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -2231,11 +2253,19 @@ static void xs_tcp_setup_socket(struct work_struct *work) struct socket *sock = transport->sock; struct rpc_xprt *xprt = &transport->xprt; int status; + unsigned int pflags = current->flags; + + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; - if (!sock) { - sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_STREAM, - IPPROTO_TCP, true); + if (xprt_connected(xprt)) + goto out; + if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, + &transport->sock_state) || + !sock) { + xs_reset_transport(transport); + sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, + SOCK_STREAM, IPPROTO_TCP, true); if (IS_ERR(sock)) { xprt_wake_pending_tasks(xprt, PTR_ERR(sock)); goto out; @@ -2259,6 +2289,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) fallthrough; case -EINPROGRESS: /* SYN_SENT! */ + set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state); if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; fallthrough; @@ -2296,6 +2327,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) xprt_clear_connecting(xprt); out_unlock: xprt_unlock_connect(xprt, transport); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -2319,13 +2351,9 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - if (transport->sock != NULL && !xprt_connecting(xprt)) { + if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu " - "seconds\n", - xprt, xprt->reestablish_timeout / HZ); - - /* Start by resetting any existing state */ - xs_reset_transport(transport); + "seconds\n", xprt, xprt->reestablish_timeout / HZ); delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); @@ -2621,6 +2649,8 @@ static const struct rpc_xprt_ops xs_udp_ops = { .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, + .get_srcaddr = xs_sock_srcaddr, + .get_srcport = xs_sock_srcport, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_udp_send_request, @@ -2643,6 +2673,8 @@ static const struct rpc_xprt_ops xs_tcp_ops = { .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, + .get_srcaddr = xs_sock_srcaddr, + .get_srcport = xs_sock_srcport, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .prepare_request = xs_stream_prepare_request, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7545321c3440b..17f8c523e33b0 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2852,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) /* Try again later if dest link is congested */ if (tsk->cong_link_cnt) { - sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100)); + sk_reset_timer(sk, &sk->sk_timer, + jiffies + msecs_to_jiffies(100)); return; } /* Prepare SYN for retransmit */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c19569819866e..1e7ed5829ed51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2084,7 +2084,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other if (ousk->oob_skb) consume_skb(ousk->oob_skb); - ousk->oob_skb = skb; + WRITE_ONCE(ousk->oob_skb, skb); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); @@ -2602,9 +2602,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) oob_skb = u->oob_skb; - if (!(state->flags & MSG_PEEK)) { - u->oob_skb = NULL; - } + if (!(state->flags & MSG_PEEK)) + WRITE_ONCE(u->oob_skb, NULL); unix_state_unlock(sk); @@ -2639,7 +2638,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, skb = NULL; } else if (sock_flag(sk, SOCK_URGINLINE)) { if (!(flags & MSG_PEEK)) { - u->oob_skb = NULL; + WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); } } else if (!(flags & MSG_PEEK)) { @@ -3094,11 +3093,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCATMARK: { struct sk_buff *skb; - struct unix_sock *u = unix_sk(sk); int answ = 0; skb = skb_peek(&sk->sk_receive_queue); - if (skb && skb == u->oob_skb) + if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) answ = 1; err = put_user(answ, (int __user *)arg); } @@ -3139,6 +3137,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa mask |= EPOLLIN | EPOLLRDNORM; if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (READ_ONCE(unix_sk(sk)->oob_skb)) + mask |= EPOLLPRI; +#endif /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5afc194a58bbd..ba1c8cc0c4671 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -622,6 +622,13 @@ static int virtio_vsock_probe(struct virtio_device *vdev) INIT_WORK(&vsock->event_work, virtio_transport_event_work); INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); + if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) + vsock->seqpacket_allow = true; + + vdev->priv = vsock; + + virtio_device_ready(vdev); + mutex_lock(&vsock->tx_lock); vsock->tx_run = true; mutex_unlock(&vsock->tx_lock); @@ -636,10 +643,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->event_run = true; mutex_unlock(&vsock->event_lock); - if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) - vsock->seqpacket_allow = true; - - vdev->priv = vsock; rcu_assign_pointer(the_virtio_vsock, vsock); mutex_unlock(&the_virtio_vsock_mutex); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 3583354a7d7fe..3a171828638b1 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1765,10 +1765,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb) write_lock_bh(&x25_list_lock); - sk_for_each(s, &x25_list) - if (x25_sk(s)->neighbour == nb) + sk_for_each(s, &x25_list) { + if (x25_sk(s)->neighbour == nb) { + write_unlock_bh(&x25_list_lock); + lock_sock(s); x25_disconnect(s, ENETUNREACH, 0, 0); - + release_sock(s); + write_lock_bh(&x25_list_lock); + } + } write_unlock_bh(&x25_list_lock); /* Remove any related forwards */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 28ef3f4465ae9..ac343cd8ff3f6 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -403,18 +403,8 @@ EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch); static int xsk_wakeup(struct xdp_sock *xs, u8 flags) { struct net_device *dev = xs->dev; - int err; - - rcu_read_lock(); - err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); - rcu_read_unlock(); - - return err; -} -static int xsk_zc_xmit(struct xdp_sock *xs) -{ - return xsk_wakeup(xs, XDP_WAKEUP_TX); + return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); } static void xsk_destruct_skb(struct sk_buff *skb) @@ -533,6 +523,12 @@ static int xsk_generic_xmit(struct sock *sk) mutex_lock(&xs->mutex); + /* Since we dropped the RCU read lock, the socket state might have changed. */ + if (unlikely(!xsk_is_bound(xs))) { + err = -ENXIO; + goto out; + } + if (xs->queue_id >= xs->dev->real_num_tx_queues) goto out; @@ -596,16 +592,26 @@ static int xsk_generic_xmit(struct sock *sk) return err; } -static int __xsk_sendmsg(struct sock *sk) +static int xsk_xmit(struct sock *sk) { struct xdp_sock *xs = xdp_sk(sk); + int ret; if (unlikely(!(xs->dev->flags & IFF_UP))) return -ENETDOWN; if (unlikely(!xs->tx)) return -ENOBUFS; - return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk); + if (xs->zc) + return xsk_wakeup(xs, XDP_WAKEUP_TX); + + /* Drop the RCU lock since the SKB path might sleep. */ + rcu_read_unlock(); + ret = xsk_generic_xmit(sk); + /* Reaquire RCU lock before going into common code. */ + rcu_read_lock(); + + return ret; } static bool xsk_no_wakeup(struct sock *sk) @@ -619,7 +625,7 @@ static bool xsk_no_wakeup(struct sock *sk) #endif } -static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) +static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { bool need_wait = !(m->msg_flags & MSG_DONTWAIT); struct sock *sk = sock->sk; @@ -639,11 +645,22 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) pool = xs->pool; if (pool->cached_need_wakeup & XDP_WAKEUP_TX) - return __xsk_sendmsg(sk); + return xsk_xmit(sk); return 0; } -static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) +static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) +{ + int ret; + + rcu_read_lock(); + ret = __xsk_sendmsg(sock, m, total_len); + rcu_read_unlock(); + + return ret; +} + +static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) { bool need_wait = !(flags & MSG_DONTWAIT); struct sock *sk = sock->sk; @@ -669,6 +686,17 @@ static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int fl return 0; } +static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) +{ + int ret; + + rcu_read_lock(); + ret = __xsk_recvmsg(sock, m, len, flags); + rcu_read_unlock(); + + return ret; +} + static __poll_t xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { @@ -679,8 +707,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sock, wait); - if (unlikely(!xsk_is_bound(xs))) + rcu_read_lock(); + if (unlikely(!xsk_is_bound(xs))) { + rcu_read_unlock(); return mask; + } pool = xs->pool; @@ -689,7 +720,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, xsk_wakeup(xs, pool->cached_need_wakeup); else /* Poll needs to drive Tx also in copy mode */ - __xsk_sendmsg(sk); + xsk_xmit(sk); } if (xs->rx && !xskq_prod_is_empty(xs->rx)) @@ -697,6 +728,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; + rcu_read_unlock(); return mask; } @@ -728,7 +760,6 @@ static void xsk_unbind_dev(struct xdp_sock *xs) /* Wait for driver to stop using the xdp socket. */ xp_del_xsk(xs->pool, xs); - xs->dev = NULL; synchronize_net(); dev_put(dev); } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index fd39bb660ebcd..0202a90b65e3a 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -584,9 +584,13 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) u32 nb_entries1 = 0, nb_entries2; if (unlikely(pool->dma_need_sync)) { + struct xdp_buff *buff; + /* Slow path */ - *xdp = xp_alloc(pool); - return !!*xdp; + buff = xp_alloc(pool); + if (buff) + *xdp = buff; + return !!buff; } if (unlikely(pool->free_list_cnt)) { diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index aa50864e4415a..9f3446af50ce2 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1984,15 +1984,15 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); + prev_time = get_nsecs(); + start_time = prev_time; + if (!opt_quiet) { ret = pthread_create(&pt, NULL, poller, NULL); if (ret) exit_with_error(ret); } - prev_time = get_nsecs(); - start_time = prev_time; - /* Configure sched priority for better wake-up accuracy */ memset(&schparam, 0, sizeof(schparam)); schparam.sched_priority = opt_schprio; diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index 7a15910d21718..8859fc1935428 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -134,6 +134,7 @@ static int populate_ruleset( ret = 0; out_free_name: + free(path_list); free(env_path_name); return ret; } diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire index 803ba75610766..a0ea1d26e6b2e 100755 --- a/scripts/atomic/fallbacks/read_acquire +++ b/scripts/atomic/fallbacks/read_acquire @@ -2,6 +2,15 @@ cat <counter); + ${int} ret; + + if (__native_word(${atomic}_t)) { + ret = smp_load_acquire(&(v)->counter); + } else { + ret = arch_${atomic}_read(v); + __atomic_acquire_fence(); + } + + return ret; } EOF diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release index 86ede759f24ea..05cdb7f42477a 100755 --- a/scripts/atomic/fallbacks/set_release +++ b/scripts/atomic/fallbacks/set_release @@ -2,6 +2,11 @@ cat <counter, i); + if (__native_word(${atomic}_t)) { + smp_store_release(&(v)->counter, i); + } else { + __atomic_release_fence(); + arch_${atomic}_set(v, i); + } } EOF diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile index 95aaf7431bffa..1cba78e1dce68 100644 --- a/scripts/dtc/Makefile +++ b/scripts/dtc/Makefile @@ -29,7 +29,7 @@ dtc-objs += yamltree.o # To include installed in a non-default path HOSTCFLAGS_yamltree.o := $(shell pkg-config --cflags yaml-0.1) # To link libyaml installed in a non-default path -HOSTLDLIBS_dtc := $(shell pkg-config yaml-0.1 --libs) +HOSTLDLIBS_dtc := $(shell pkg-config --libs yaml-0.1) endif # Generated files need one more search path to include headers in source tree diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index e9db7dcb3e5f4..b04aa8e91a41f 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -429,6 +429,23 @@ static unsigned int stackleak_cleanup_execute(void) return 0; } +/* + * STRING_CST may or may not be NUL terminated: + * https://gcc.gnu.org/onlinedocs/gccint/Constant-expressions.html + */ +static inline bool string_equal(tree node, const char *string, int length) +{ + if (TREE_STRING_LENGTH(node) < length) + return false; + if (TREE_STRING_LENGTH(node) > length + 1) + return false; + if (TREE_STRING_LENGTH(node) == length + 1 && + TREE_STRING_POINTER(node)[length] != '\0') + return false; + return !memcmp(TREE_STRING_POINTER(node), string, length); +} +#define STRING_EQUAL(node, str) string_equal(node, str, strlen(str)) + static bool stackleak_gate(void) { tree section; @@ -438,13 +455,13 @@ static bool stackleak_gate(void) if (section && TREE_VALUE(section)) { section = TREE_VALUE(TREE_VALUE(section)); - if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10)) + if (STRING_EQUAL(section, ".init.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13)) + if (STRING_EQUAL(section, ".devinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13)) + if (STRING_EQUAL(section, ".cpuinit.text")) return false; - if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13)) + if (STRING_EQUAL(section, ".meminit.text")) return false; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6bfa332179140..e04ae56931e2e 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -669,7 +669,7 @@ static void handle_modversion(const struct module *mod, unsigned int crc; if (sym->st_shndx == SHN_UNDEF) { - warn("EXPORT symbol \"%s\" [%s%s] version ...\n" + warn("EXPORT symbol \"%s\" [%s%s] version generation failed, symbol will not be versioned.\n" "Is \"%s\" prototyped in ?\n", symname, mod->name, mod->is_vmlinux ? "" : ".ko", symname); diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 08f907382c618..7d87772f0ce68 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -86,7 +86,7 @@ static int __init evm_set_fixmode(char *str) else pr_err("invalid \"%s\" mode", str); - return 0; + return 1; } __setup("evm=", evm_set_fixmode); diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c index 5de0d599a2748..97bc27bbf0797 100644 --- a/security/keys/keyctl_pkey.c +++ b/security/keys/keyctl_pkey.c @@ -135,15 +135,23 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par switch (op) { case KEYCTL_PKEY_ENCRYPT: + if (uparams.in_len > info.max_dec_size || + uparams.out_len > info.max_enc_size) + return -EINVAL; + break; case KEYCTL_PKEY_DECRYPT: if (uparams.in_len > info.max_enc_size || uparams.out_len > info.max_dec_size) return -EINVAL; break; case KEYCTL_PKEY_SIGN: + if (uparams.in_len > info.max_data_size || + uparams.out_len > info.max_sig_size) + return -EINVAL; + break; case KEYCTL_PKEY_VERIFY: - if (uparams.in_len > info.max_sig_size || - uparams.out_len > info.max_data_size) + if (uparams.in_len > info.max_data_size || + uparams.in2_len > info.max_sig_size) return -EINVAL; break; default: @@ -151,7 +159,7 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par } params->in_len = uparams.in_len; - params->out_len = uparams.out_len; + params->out_len = uparams.out_len; /* Note: same as in2_len */ return 0; } diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c index d5c891d8d3534..9b9d3ef79cbe3 100644 --- a/security/keys/trusted-keys/trusted_core.c +++ b/security/keys/trusted-keys/trusted_core.c @@ -27,10 +27,10 @@ module_param_named(source, trusted_key_source, charp, 0); MODULE_PARM_DESC(source, "Select trusted keys source (tpm or tee)"); static const struct trusted_key_source trusted_key_sources[] = { -#if defined(CONFIG_TCG_TPM) +#if IS_REACHABLE(CONFIG_TCG_TPM) { "tpm", &trusted_key_tpm_ops }, #endif -#if defined(CONFIG_TEE) +#if IS_REACHABLE(CONFIG_TEE) { "tee", &trusted_key_tee_ops }, #endif }; @@ -351,7 +351,7 @@ static int __init init_trusted(void) static void __exit cleanup_trusted(void) { - static_call(trusted_key_exit)(); + static_call_cond(trusted_key_exit)(); } late_initcall(init_trusted); diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 32396962f04d6..7e27ce394020d 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -192,7 +192,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset, return PTR_ERR(ruleset); /* Creates anonymous FD referring to the ruleset. */ - ruleset_fd = anon_inode_getfd("landlock-ruleset", &ruleset_fops, + ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops, ruleset, O_RDWR | O_CLOEXEC); if (ruleset_fd < 0) landlock_put_ruleset(ruleset); diff --git a/security/security.c b/security/security.c index 22261d79f3333..b7cf5cbfdc677 100644 --- a/security/security.c +++ b/security/security.c @@ -884,9 +884,22 @@ int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) return call_int_hook(fs_context_dup, 0, fc, src_fc); } -int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) +int security_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) { - return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param); + struct security_hook_list *hp; + int trc; + int rc = -ENOPARAM; + + hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param, + list) { + trc = hp->hook.fs_context_parse_param(fc, param); + if (trc == 0) + rc = 0; + else if (trc != -ENOPARAM) + return trc; + } + return rc; } int security_sb_alloc(struct super_block *sb) @@ -2391,6 +2404,13 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, } EXPORT_SYMBOL(security_sctp_sk_clone); +int security_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + return call_int_hook(sctp_assoc_established, 0, asoc, skb); +} +EXPORT_SYMBOL(security_sctp_assoc_established); + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5b6895e4fc29e..ea725891e566e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -342,6 +342,10 @@ static void inode_free_security(struct inode *inode) struct selinux_mnt_opts { const char *fscontext, *context, *rootcontext, *defcontext; + u32 fscontext_sid; + u32 context_sid; + u32 rootcontext_sid; + u32 defcontext_sid; }; static void selinux_free_mnt_opts(void *mnt_opts) @@ -479,7 +483,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) static int sb_check_xattr_support(struct super_block *sb) { - struct superblock_security_struct *sbsec = sb->s_security; + struct superblock_security_struct *sbsec = selinux_superblock(sb); struct dentry *root = sb->s_root; struct inode *root_inode = d_backing_inode(root); u32 sid; @@ -598,15 +602,14 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag, return 0; } -static int parse_sid(struct super_block *sb, const char *s, u32 *sid, - gfp_t gfp) +static int parse_sid(struct super_block *sb, const char *s, u32 *sid) { int rc = security_context_str_to_sid(&selinux_state, s, - sid, gfp); + sid, GFP_KERNEL); if (rc) pr_warn("SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", - s, sb->s_id, sb->s_type->name, rc); + s, sb ? sb->s_id : "?", sb ? sb->s_type->name : "?", rc); return rc; } @@ -673,8 +676,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, */ if (opts) { if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &fscontext_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->fscontext, &fscontext_sid); if (rc) goto out; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, @@ -683,8 +685,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= FSCONTEXT_MNT; } if (opts->context) { - rc = parse_sid(sb, opts->context, &context_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->context, &context_sid); if (rc) goto out; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, @@ -693,8 +694,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= CONTEXT_MNT; } if (opts->rootcontext) { - rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid); if (rc) goto out; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, @@ -703,8 +703,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= ROOTCONTEXT_MNT; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &defcontext_sid, - GFP_KERNEL); + rc = parse_sid(sb, opts->defcontext, &defcontext_sid); if (rc) goto out; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, @@ -996,21 +995,29 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) if (opts->context || opts->defcontext) goto err; opts->context = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->context_sid); break; case Opt_fscontext: if (opts->fscontext) goto err; opts->fscontext = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->fscontext_sid); break; case Opt_rootcontext: if (opts->rootcontext) goto err; opts->rootcontext = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->rootcontext_sid); break; case Opt_defcontext: if (opts->context || opts->defcontext) goto err; opts->defcontext = s; + if (selinux_initialized(&selinux_state)) + parse_sid(NULL, s, &opts->defcontext_sid); break; } @@ -2647,9 +2654,7 @@ static int selinux_sb_eat_lsm_opts(char *options, void **mnt_opts) static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) { struct selinux_mnt_opts *opts = mnt_opts; - struct superblock_security_struct *sbsec = sb->s_security; - u32 sid; - int rc; + struct superblock_security_struct *sbsec = selinux_superblock(sb); /* * Superblock not initialized (i.e. no options) - reject if any @@ -2666,34 +2671,36 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) return (sbsec->flags & SE_MNTMASK) ? 1 : 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid, GFP_NOWAIT); - if (rc) + if (opts->fscontext_sid == SECSID_NULL) return 1; - if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) + else if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, + opts->fscontext_sid)) return 1; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid, GFP_NOWAIT); - if (rc) + if (opts->context_sid == SECSID_NULL) return 1; - if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) + else if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, + opts->context_sid)) return 1; } if (opts->rootcontext) { - struct inode_security_struct *root_isec; - - root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid, GFP_NOWAIT); - if (rc) - return 1; - if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) + if (opts->rootcontext_sid == SECSID_NULL) return 1; + else { + struct inode_security_struct *root_isec; + + root_isec = backing_inode_security(sb->s_root); + if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, + opts->rootcontext_sid)) + return 1; + } } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid, GFP_NOWAIT); - if (rc) + if (opts->defcontext_sid == SECSID_NULL) return 1; - if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) + else if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, + opts->defcontext_sid)) return 1; } return 0; @@ -2713,14 +2720,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) return 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->fscontext, &sid); if (rc) return rc; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) goto out_bad_option; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->context, &sid); if (rc) return rc; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) @@ -2729,14 +2736,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) if (opts->rootcontext) { struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->rootcontext, &sid); if (rc) return rc; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) goto out_bad_option; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid, GFP_KERNEL); + rc = parse_sid(sb, opts->defcontext, &sid); if (rc) return rc; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) @@ -2860,10 +2867,9 @@ static int selinux_fs_context_parse_param(struct fs_context *fc, return opt; rc = selinux_add_opt(opt, param->string, &fc->security); - if (!rc) { + if (!rc) param->string = NULL; - rc = 1; - } + return rc; } @@ -3745,6 +3751,12 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, CAP_OPT_NONE, true); break; + case FIOCLEX: + case FIONCLEX: + if (!selinux_policycap_ioctl_skip_cloexec()) + error = ioctl_has_perm(cred, file, FILE__IOCTL, (u16) cmd); + break; + /* default case assumes that the command will go * to the file's ioctl() function. */ @@ -5299,37 +5311,38 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent) sksec->sclass = isec->sclass; } -/* Called whenever SCTP receives an INIT chunk. This happens when an incoming - * connect(2), sctp_connectx(3) or sctp_sendmsg(3) (with no association - * already present). +/* + * Determines peer_secid for the asoc and updates socket's peer label + * if it's the first association on the socket. */ -static int selinux_sctp_assoc_request(struct sctp_association *asoc, - struct sk_buff *skb) +static int selinux_sctp_process_new_assoc(struct sctp_association *asoc, + struct sk_buff *skb) { - struct sk_security_struct *sksec = asoc->base.sk->sk_security; + struct sock *sk = asoc->base.sk; + u16 family = sk->sk_family; + struct sk_security_struct *sksec = sk->sk_security; struct common_audit_data ad; struct lsm_network_audit net = {0,}; - u8 peerlbl_active; - u32 peer_sid = SECINITSID_UNLABELED; - u32 conn_sid; - int err = 0; + int err; - if (!selinux_policycap_extsockclass()) - return 0; + /* handle mapped IPv4 packets arriving via IPv6 sockets */ + if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) + family = PF_INET; - peerlbl_active = selinux_peerlbl_enabled(); + if (selinux_peerlbl_enabled()) { + asoc->peer_secid = SECSID_NULL; - if (peerlbl_active) { /* This will return peer_sid = SECSID_NULL if there are * no peer labels, see security_net_peersid_resolve(). */ - err = selinux_skb_peerlbl_sid(skb, asoc->base.sk->sk_family, - &peer_sid); + err = selinux_skb_peerlbl_sid(skb, family, &asoc->peer_secid); if (err) return err; - if (peer_sid == SECSID_NULL) - peer_sid = SECINITSID_UNLABELED; + if (asoc->peer_secid == SECSID_NULL) + asoc->peer_secid = SECINITSID_UNLABELED; + } else { + asoc->peer_secid = SECINITSID_UNLABELED; } if (sksec->sctp_assoc_state == SCTP_ASSOC_UNSET) { @@ -5340,8 +5353,8 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, * then it is approved by policy and used as the primary * peer SID for getpeercon(3). */ - sksec->peer_sid = peer_sid; - } else if (sksec->peer_sid != peer_sid) { + sksec->peer_sid = asoc->peer_secid; + } else if (sksec->peer_sid != asoc->peer_secid) { /* Other association peer SIDs are checked to enforce * consistency among the peer SIDs. */ @@ -5349,11 +5362,32 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, ad.u.net = &net; ad.u.net->sk = asoc->base.sk; err = avc_has_perm(&selinux_state, - sksec->peer_sid, peer_sid, sksec->sclass, - SCTP_SOCKET__ASSOCIATION, &ad); + sksec->peer_sid, asoc->peer_secid, + sksec->sclass, SCTP_SOCKET__ASSOCIATION, + &ad); if (err) return err; } + return 0; +} + +/* Called whenever SCTP receives an INIT or COOKIE ECHO chunk. This + * happens on an incoming connect(2), sctp_connectx(3) or + * sctp_sendmsg(3) (with no association already present). + */ +static int selinux_sctp_assoc_request(struct sctp_association *asoc, + struct sk_buff *skb) +{ + struct sk_security_struct *sksec = asoc->base.sk->sk_security; + u32 conn_sid; + int err; + + if (!selinux_policycap_extsockclass()) + return 0; + + err = selinux_sctp_process_new_assoc(asoc, skb); + if (err) + return err; /* Compute the MLS component for the connection and store * the information in asoc. This will be used by SCTP TCP type @@ -5361,17 +5395,36 @@ static int selinux_sctp_assoc_request(struct sctp_association *asoc, * socket to be generated. selinux_sctp_sk_clone() will then * plug this into the new socket. */ - err = selinux_conn_sid(sksec->sid, peer_sid, &conn_sid); + err = selinux_conn_sid(sksec->sid, asoc->peer_secid, &conn_sid); if (err) return err; asoc->secid = conn_sid; - asoc->peer_secid = peer_sid; /* Set any NetLabel labels including CIPSO/CALIPSO options. */ return selinux_netlbl_sctp_assoc_request(asoc, skb); } +/* Called when SCTP receives a COOKIE ACK chunk as the final + * response to an association request (initited by us). + */ +static int selinux_sctp_assoc_established(struct sctp_association *asoc, + struct sk_buff *skb) +{ + struct sk_security_struct *sksec = asoc->base.sk->sk_security; + + if (!selinux_policycap_extsockclass()) + return 0; + + /* Inherit secid from the parent socket - this will be picked up + * by selinux_sctp_sk_clone() if the association gets peeled off + * into a new socket. + */ + asoc->secid = sksec->sid; + + return selinux_sctp_process_new_assoc(asoc, skb); +} + /* Check if sctp IPv4/IPv6 addresses are valid for binding or connecting * based on their @optname. */ @@ -7192,6 +7245,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(sctp_assoc_request, selinux_sctp_assoc_request), LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone), LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect), + LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established), LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request), LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone), LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established), diff --git a/security/selinux/include/policycap.h b/security/selinux/include/policycap.h index 2ec038efbb03c..a9e572ca4fd96 100644 --- a/security/selinux/include/policycap.h +++ b/security/selinux/include/policycap.h @@ -11,6 +11,7 @@ enum { POLICYDB_CAPABILITY_CGROUPSECLABEL, POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION, POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS, + POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC, __POLICYDB_CAPABILITY_MAX }; #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1) diff --git a/security/selinux/include/policycap_names.h b/security/selinux/include/policycap_names.h index b89289f092c93..ebd64afe1defd 100644 --- a/security/selinux/include/policycap_names.h +++ b/security/selinux/include/policycap_names.h @@ -12,7 +12,8 @@ const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { "always_check_network", "cgroup_seclabel", "nnp_nosuid_transition", - "genfs_seclabel_symlinks" + "genfs_seclabel_symlinks", + "ioctl_skip_cloexec" }; #endif /* _SELINUX_POLICYCAP_NAMES_H_ */ diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index ac0ece01305a6..c0d966020ebdd 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -219,6 +219,13 @@ static inline bool selinux_policycap_genfs_seclabel_symlinks(void) return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]); } +static inline bool selinux_policycap_ioctl_skip_cloexec(void) +{ + struct selinux_state *state = &selinux_state; + + return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_IOCTL_SKIP_CLOEXEC]); +} + struct selinux_policy_convert_data; struct selinux_load_state { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index e4cd7cb856f37..f2f6203e0fff5 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -2127,6 +2127,8 @@ static int sel_fill_super(struct super_block *sb, struct fs_context *fc) } ret = sel_make_avc_files(dentry); + if (ret) + goto err; dentry = sel_make_dir(sb->s_root, "ss", &fsi->last_ino); if (IS_ERR(dentry)) { diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 90697317895fb..c576832febc67 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -347,7 +347,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, int rc; struct xfrm_sec_ctx *ctx; char *ctx_str = NULL; - int str_len; + u32 str_len; if (!polsec) return 0; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 14b279cc75c96..6207762dbdb13 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2510,7 +2510,7 @@ static int smk_ipv6_check(struct smack_known *subject, #ifdef CONFIG_AUDIT smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); ad.a.u.net->family = PF_INET6; - ad.a.u.net->dport = ntohs(address->sin6_port); + ad.a.u.net->dport = address->sin6_port; if (act == SMK_RECEIVING) ad.a.u.net->v6info.saddr = address->sin6_addr; else diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c index 3445ae6fd4794..363b65be87ab7 100644 --- a/security/tomoyo/load_policy.c +++ b/security/tomoyo/load_policy.c @@ -24,7 +24,7 @@ static const char *tomoyo_loader; static int __init tomoyo_loader_setup(char *str) { tomoyo_loader = str; - return 0; + return 1; } __setup("TOMOYO_loader=", tomoyo_loader_setup); @@ -64,7 +64,7 @@ static const char *tomoyo_trigger; static int __init tomoyo_trigger_setup(char *str) { tomoyo_trigger = str; - return 0; + return 1; } __setup("TOMOYO_trigger=", tomoyo_trigger_setup); diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 3ee9edf858156..f158f0abd25d8 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -774,6 +774,11 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, if (oss_period_size < 16) return -EINVAL; + + /* don't allocate too large period; 1MB period must be enough */ + if (oss_period_size > 1024 * 1024) + return -ENOMEM; + runtime->oss.period_bytes = oss_period_size; runtime->oss.period_frames = 1; runtime->oss.periods = oss_periods; @@ -1043,10 +1048,9 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) goto failure; } #endif - oss_period_size *= oss_frame_size; - - oss_buffer_size = oss_period_size * runtime->oss.periods; - if (oss_buffer_size < 0) { + oss_period_size = array_size(oss_period_size, oss_frame_size); + oss_buffer_size = array_size(oss_period_size, runtime->oss.periods); + if (oss_buffer_size <= 0) { err = -EINVAL; goto failure; } diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 061ba06bc9262..82e180c776ae1 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -62,7 +62,10 @@ static int snd_pcm_plugin_alloc(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t width = snd_pcm_format_physical_width(format->format); if (width < 0) return width; - size = frames * format->channels * width; + size = array3_size(frames, format->channels, width); + /* check for too large period size once again */ + if (size > 1024 * 1024) + return -ENOMEM; if (snd_BUG_ON(size % 8)) return -ENXIO; size /= 8; diff --git a/sound/core/pcm.c b/sound/core/pcm.c index ba4a987ed1c62..977d54320a5ca 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -969,6 +969,8 @@ int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream, init_waitqueue_head(&runtime->tsleep); runtime->status->state = SNDRV_PCM_STATE_OPEN; + mutex_init(&runtime->buffer_mutex); + atomic_set(&runtime->buffer_accessing, 0); substream->runtime = runtime; substream->private_data = pcm->private_data; @@ -1002,6 +1004,7 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) } else { substream->runtime = NULL; } + mutex_destroy(&runtime->buffer_mutex); kfree(runtime); put_pid(substream->pid); substream->pid = NULL; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index f2090025236b9..1fc7c50ffa625 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2273,6 +2273,10 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, err = -EINVAL; goto _end_unlock; } + if (!atomic_inc_unless_negative(&runtime->buffer_accessing)) { + err = -EBUSY; + goto _end_unlock; + } snd_pcm_stream_unlock_irq(substream); if (!is_playback) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_CPU); @@ -2281,6 +2285,7 @@ snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, if (is_playback) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); snd_pcm_stream_lock_irq(substream); + atomic_dec(&runtime->buffer_accessing); if (err < 0) goto _end_unlock; err = pcm_accessible_state(runtime); diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index b70ce3b69ab4d..8848d2f3160d8 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -163,19 +163,20 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, size_t size; struct snd_dma_buffer new_dmab; + mutex_lock(&substream->pcm->open_mutex); if (substream->runtime) { buffer->error = -EBUSY; - return; + goto unlock; } if (!snd_info_get_line(buffer, line, sizeof(line))) { snd_info_get_str(str, line, sizeof(str)); size = simple_strtoul(str, NULL, 10) * 1024; if ((size != 0 && size < 8192) || size > substream->dma_max) { buffer->error = -EINVAL; - return; + goto unlock; } if (substream->dma_buffer.bytes == size) - return; + goto unlock; memset(&new_dmab, 0, sizeof(new_dmab)); new_dmab.dev = substream->dma_buffer.dev; if (size > 0) { @@ -189,7 +190,7 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, substream->pcm->card->number, substream->pcm->device, substream->stream ? 'c' : 'p', substream->number, substream->pcm->name, size); - return; + goto unlock; } substream->buffer_bytes_max = size; } else { @@ -201,6 +202,8 @@ static void snd_pcm_lib_preallocate_proc_write(struct snd_info_entry *entry, } else { buffer->error = -EINVAL; } + unlock: + mutex_unlock(&substream->pcm->open_mutex); } static inline void preallocate_info_init(struct snd_pcm_substream *substream) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index a056b3ef3c843..4adaee62ef333 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -685,6 +685,30 @@ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm, return 0; } +/* acquire buffer_mutex; if it's in r/w operation, return -EBUSY, otherwise + * block the further r/w operations + */ +static int snd_pcm_buffer_access_lock(struct snd_pcm_runtime *runtime) +{ + if (!atomic_dec_unless_positive(&runtime->buffer_accessing)) + return -EBUSY; + mutex_lock(&runtime->buffer_mutex); + return 0; /* keep buffer_mutex, unlocked by below */ +} + +/* release buffer_mutex and clear r/w access flag */ +static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime) +{ + mutex_unlock(&runtime->buffer_mutex); + atomic_inc(&runtime->buffer_accessing); +} + +#if IS_ENABLED(CONFIG_SND_PCM_OSS) +#define is_oss_stream(substream) ((substream)->oss.oss) +#else +#define is_oss_stream(substream) false +#endif + static int snd_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { @@ -696,22 +720,25 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; + err = snd_pcm_buffer_access_lock(runtime); + if (err < 0) + return err; snd_pcm_stream_lock_irq(substream); switch (runtime->status->state) { case SNDRV_PCM_STATE_OPEN: case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: + if (!is_oss_stream(substream) && + atomic_read(&substream->mmap_count)) + err = -EBADFD; break; default: - snd_pcm_stream_unlock_irq(substream); - return -EBADFD; + err = -EBADFD; + break; } snd_pcm_stream_unlock_irq(substream); -#if IS_ENABLED(CONFIG_SND_PCM_OSS) - if (!substream->oss.oss) -#endif - if (atomic_read(&substream->mmap_count)) - return -EBADFD; + if (err) + goto unlock; snd_pcm_sync_stop(substream, true); @@ -799,16 +826,21 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, if (usecs >= 0) cpu_latency_qos_add_request(&substream->latency_pm_qos_req, usecs); - return 0; + err = 0; _error: - /* hardware might be unusable from this time, - so we force application to retry to set - the correct hardware parameter settings */ - snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); - if (substream->ops->hw_free != NULL) - substream->ops->hw_free(substream); - if (substream->managed_buffer_alloc) - snd_pcm_lib_free_pages(substream); + if (err) { + /* hardware might be unusable from this time, + * so we force application to retry to set + * the correct hardware parameter settings + */ + snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); + if (substream->ops->hw_free != NULL) + substream->ops->hw_free(substream); + if (substream->managed_buffer_alloc) + snd_pcm_lib_free_pages(substream); + } + unlock: + snd_pcm_buffer_access_unlock(runtime); return err; } @@ -848,26 +880,33 @@ static int do_hw_free(struct snd_pcm_substream *substream) static int snd_pcm_hw_free(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; - int result; + int result = 0; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; + result = snd_pcm_buffer_access_lock(runtime); + if (result < 0) + return result; snd_pcm_stream_lock_irq(substream); switch (runtime->status->state) { case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: + if (atomic_read(&substream->mmap_count)) + result = -EBADFD; break; default: - snd_pcm_stream_unlock_irq(substream); - return -EBADFD; + result = -EBADFD; + break; } snd_pcm_stream_unlock_irq(substream); - if (atomic_read(&substream->mmap_count)) - return -EBADFD; + if (result) + goto unlock; result = do_hw_free(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); cpu_latency_qos_remove_request(&substream->latency_pm_qos_req); + unlock: + snd_pcm_buffer_access_unlock(runtime); return result; } @@ -1173,15 +1212,17 @@ struct action_ops { static int snd_pcm_action_group(const struct action_ops *ops, struct snd_pcm_substream *substream, snd_pcm_state_t state, - bool do_lock) + bool stream_lock) { struct snd_pcm_substream *s = NULL; struct snd_pcm_substream *s1; int res = 0, depth = 1; snd_pcm_group_for_each_entry(s, substream) { - if (do_lock && s != substream) { - if (s->pcm->nonatomic) + if (s != substream) { + if (!stream_lock) + mutex_lock_nested(&s->runtime->buffer_mutex, depth); + else if (s->pcm->nonatomic) mutex_lock_nested(&s->self_group.mutex, depth); else spin_lock_nested(&s->self_group.lock, depth); @@ -1209,18 +1250,18 @@ static int snd_pcm_action_group(const struct action_ops *ops, ops->post_action(s, state); } _unlock: - if (do_lock) { - /* unlock streams */ - snd_pcm_group_for_each_entry(s1, substream) { - if (s1 != substream) { - if (s1->pcm->nonatomic) - mutex_unlock(&s1->self_group.mutex); - else - spin_unlock(&s1->self_group.lock); - } - if (s1 == s) /* end */ - break; + /* unlock streams */ + snd_pcm_group_for_each_entry(s1, substream) { + if (s1 != substream) { + if (!stream_lock) + mutex_unlock(&s1->runtime->buffer_mutex); + else if (s1->pcm->nonatomic) + mutex_unlock(&s1->self_group.mutex); + else + spin_unlock(&s1->self_group.lock); } + if (s1 == s) /* end */ + break; } return res; } @@ -1350,10 +1391,15 @@ static int snd_pcm_action_nonatomic(const struct action_ops *ops, /* Guarantee the group members won't change during non-atomic action */ down_read(&snd_pcm_link_rwsem); + res = snd_pcm_buffer_access_lock(substream->runtime); + if (res < 0) + goto unlock; if (snd_pcm_stream_linked(substream)) res = snd_pcm_action_group(ops, substream, state, false); else res = snd_pcm_action_single(ops, substream, state); + snd_pcm_buffer_access_unlock(substream->runtime); + unlock: up_read(&snd_pcm_link_rwsem); return res; } @@ -1843,11 +1889,13 @@ static int snd_pcm_do_reset(struct snd_pcm_substream *substream, int err = snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_RESET, NULL); if (err < 0) return err; + snd_pcm_stream_lock_irq(substream); runtime->hw_ptr_base = 0; runtime->hw_ptr_interrupt = runtime->status->hw_ptr - runtime->status->hw_ptr % runtime->period_size; runtime->silence_start = runtime->status->hw_ptr; runtime->silence_filled = 0; + snd_pcm_stream_unlock_irq(substream); return 0; } @@ -1855,10 +1903,12 @@ static void snd_pcm_post_reset(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; + snd_pcm_stream_lock_irq(substream); runtime->control->appl_ptr = runtime->status->hw_ptr; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && runtime->silence_size > 0) snd_pcm_playback_silence(substream, ULONG_MAX); + snd_pcm_stream_unlock_irq(substream); } static const struct action_ops snd_pcm_action_reset = { diff --git a/sound/firewire/fcp.c b/sound/firewire/fcp.c index bbfbebf4affbc..df44dd5dc4b22 100644 --- a/sound/firewire/fcp.c +++ b/sound/firewire/fcp.c @@ -240,9 +240,7 @@ int fcp_avc_transaction(struct fw_unit *unit, t.response_match_bytes = response_match_bytes; t.state = STATE_PENDING; init_waitqueue_head(&t.wait); - - if (*(const u8 *)command == 0x00 || *(const u8 *)command == 0x03) - t.deferrable = true; + t.deferrable = (*(const u8 *)command == 0x00 || *(const u8 *)command == 0x03); spin_lock_irq(&transactions_lock); list_add_tail(&t.list, &transactions); diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 4fb90ceb4053b..70fd8b13938ed 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -11,6 +11,7 @@ #include #include #include +#include static int dsp_driver; @@ -31,7 +32,12 @@ struct config_entry { u16 device; u8 acpi_hid[ACPI_ID_LEN]; const struct dmi_system_id *dmi_table; - u8 codec_hid[ACPI_ID_LEN]; + const struct snd_soc_acpi_codecs *codec_hid; +}; + +static const struct snd_soc_acpi_codecs __maybe_unused essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, }; /* @@ -77,7 +83,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x5a98, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, #endif #if IS_ENABLED(CONFIG_SND_SOC_INTEL_APL) @@ -163,7 +169,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x3198, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, #endif @@ -193,6 +199,11 @@ static const struct config_entry config_table[] = { {} } }, + { + .flags = FLAG_SOF, + .device = 0x09dc8, + .codec_hid = &essx_83x6, + }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x9dc8, @@ -251,7 +262,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x02c8, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, @@ -280,7 +291,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x06c8, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, @@ -327,7 +338,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0x4dc8, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, @@ -353,7 +364,7 @@ static const struct config_entry config_table[] = { { .flags = FLAG_SOF, .device = 0xa0c8, - .codec_hid = "ESSX8336", + .codec_hid = &essx_83x6, }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, @@ -414,8 +425,15 @@ static const struct config_entry *snd_intel_dsp_find_config continue; if (table->dmi_table && !dmi_check_system(table->dmi_table)) continue; - if (table->codec_hid[0] && !acpi_dev_present(table->codec_hid, NULL, -1)) - continue; + if (table->codec_hid) { + int i; + + for (i = 0; i < table->codec_hid->num_codecs; i++) + if (acpi_dev_present(table->codec_hid->codecs[i], NULL, -1)) + break; + if (i == table->codec_hid->num_codecs) + continue; + } return table; } return NULL; diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 128476aa7c61d..4063da3782833 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -130,6 +130,28 @@ bool intel_nhlt_has_endpoint_type(struct nhlt_acpi_table *nhlt, u8 link_type) } EXPORT_SYMBOL(intel_nhlt_has_endpoint_type); +int intel_nhlt_ssp_endpoint_mask(struct nhlt_acpi_table *nhlt, u8 device_type) +{ + struct nhlt_endpoint *epnt; + int ssp_mask = 0; + int i; + + if (!nhlt || (device_type != NHLT_DEVICE_BT && device_type != NHLT_DEVICE_I2S)) + return 0; + + epnt = (struct nhlt_endpoint *)nhlt->desc; + for (i = 0; i < nhlt->endpoint_count; i++) { + if (epnt->linktype == NHLT_LINK_SSP && epnt->device_type == device_type) { + /* for SSP the virtual bus id is the SSP port */ + ssp_mask |= BIT(epnt->virtual_bus_id); + } + epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); + } + + return ssp_mask; +} +EXPORT_SYMBOL(intel_nhlt_ssp_endpoint_mask); + static struct nhlt_specific_cfg * nhlt_get_specific_cfg(struct device *dev, struct nhlt_fmt *fmt, u8 num_ch, u32 rate, u8 vbps, u8 bps) diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index b6bdebd9ef275..10112e1bb25dc 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -494,7 +494,7 @@ static int snd_cs423x_pnpbios_detect(struct pnp_dev *pdev, static int dev; int err; struct snd_card *card; - struct pnp_dev *cdev; + struct pnp_dev *cdev, *iter; char cid[PNP_ID_LEN]; if (pnp_device_is_isapnp(pdev)) @@ -510,9 +510,11 @@ static int snd_cs423x_pnpbios_detect(struct pnp_dev *pdev, strcpy(cid, pdev->id[0].id); cid[5] = '1'; cdev = NULL; - list_for_each_entry(cdev, &(pdev->protocol->devices), protocol_list) { - if (!strcmp(cdev->id[0].id, cid)) + list_for_each_entry(iter, &(pdev->protocol->devices), protocol_list) { + if (!strcmp(iter->id[0].id, cid)) { + cdev = iter; break; + } } err = snd_cs423x_card_new(&pdev->dev, dev, &card); if (err < 0) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 01f296d524ce6..cb60a07d39a8e 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -938,8 +938,8 @@ static int snd_ac97_ad18xx_pcm_get_volume(struct snd_kcontrol *kcontrol, struct int codec = kcontrol->private_value & 3; mutex_lock(&ac97->page_mutex); - ucontrol->value.integer.value[0] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 0) & 31); - ucontrol->value.integer.value[1] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 8) & 31); + ucontrol->value.integer.value[0] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 8) & 31); + ucontrol->value.integer.value[1] = 31 - ((ac97->spec.ad18xx.pcmreg[codec] >> 0) & 31); mutex_unlock(&ac97->page_mutex); return 0; } diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index 9a678b5cf2855..dab801d9d3b48 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -298,7 +298,6 @@ MODULE_PARM_DESC(joystick_port, "Joystick port address."); #define CM_MICGAINZ 0x01 /* mic boost */ #define CM_MICGAINZ_SHIFT 0 -#define CM_REG_MIXER3 0x24 #define CM_REG_AUX_VOL 0x26 #define CM_VAUXL_MASK 0xf0 #define CM_VAUXR_MASK 0x0f @@ -3265,7 +3264,7 @@ static int snd_cmipci_probe(struct pci_dev *pci, */ static const unsigned char saved_regs[] = { CM_REG_FUNCTRL1, CM_REG_CHFORMAT, CM_REG_LEGACY_CTRL, CM_REG_MISC_CTRL, - CM_REG_MIXER0, CM_REG_MIXER1, CM_REG_MIXER2, CM_REG_MIXER3, CM_REG_PLL, + CM_REG_MIXER0, CM_REG_MIXER1, CM_REG_MIXER2, CM_REG_AUX_VOL, CM_REG_PLL, CM_REG_CH0_FRAME1, CM_REG_CH0_FRAME2, CM_REG_CH1_FRAME1, CM_REG_CH1_FRAME2, CM_REG_EXT_MISC, CM_REG_INT_STATUS, CM_REG_INT_HLDCLR, CM_REG_FUNCTRL0, diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 572ff0d1fafee..8eff25d2d9e67 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2066,14 +2066,16 @@ static const struct hda_controller_ops pci_hda_ops = { .position_check = azx_position_check, }; +static DECLARE_BITMAP(probed_devs, SNDRV_CARDS); + static int azx_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { - static int dev; struct snd_card *card; struct hda_intel *hda; struct azx *chip; bool schedule_probe; + int dev; int err; if (pci_match_id(driver_denylist, pci)) { @@ -2081,10 +2083,11 @@ static int azx_probe(struct pci_dev *pci, return -ENODEV; } + dev = find_first_zero_bit(probed_devs, SNDRV_CARDS); if (dev >= SNDRV_CARDS) return -ENODEV; if (!enable[dev]) { - dev++; + set_bit(dev, probed_devs); return -ENOENT; } @@ -2151,7 +2154,7 @@ static int azx_probe(struct pci_dev *pci, if (schedule_probe) schedule_delayed_work(&hda->probe_work, 0); - dev++; + set_bit(dev, probed_devs); if (chip->disabled) complete_all(&hda->probe_wait); return 0; @@ -2374,6 +2377,7 @@ static void azx_remove(struct pci_dev *pci) cancel_delayed_work_sync(&hda->probe_work); device_lock(&pci->dev); + clear_bit(chip->dev_index, probed_devs); pci_set_drvdata(pci, NULL); snd_card_free(card); } diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 92df4f243ec65..cf4f277dccdda 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1617,6 +1617,7 @@ static void hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, struct hda_codec *codec = per_pin->codec; struct hdmi_spec *spec = codec->spec; struct hdmi_eld *eld = &spec->temp_eld; + struct device *dev = hda_codec_dev(codec); hda_nid_t pin_nid = per_pin->pin_nid; int dev_id = per_pin->dev_id; /* @@ -1630,8 +1631,13 @@ static void hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, int present; int ret; +#ifdef CONFIG_PM + if (dev->power.runtime_status == RPM_SUSPENDING) + return; +#endif + ret = snd_hda_power_up_pm(codec); - if (ret < 0 && pm_runtime_suspended(hda_codec_dev(codec))) + if (ret < 0 && pm_runtime_suspended(dev)) goto out; present = snd_hda_jack_pin_sense(codec, pin_nid, dev_id); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3a42457984e98..16e90524a4977 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3617,8 +3617,8 @@ static void alc256_shutup(struct hda_codec *codec) /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly * when booting with headset plugged. So skip setting it for the codec alc257 */ - if (spec->codec_variant != ALC269_TYPE_ALC257 && - spec->codec_variant != ALC269_TYPE_ALC256) + if (codec->core.vendor_id != 0x10ec0236 && + codec->core.vendor_id != 0x10ec0257) alc_update_coef_idx(codec, 0x46, 0, 3 << 12); if (!spec->no_shutup_pins) @@ -6948,6 +6948,7 @@ enum { ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, + ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS, ALC269VC_FIXUP_ACER_HEADSET_MIC, @@ -8273,6 +8274,14 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x08}, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2fcf}, + { } + }, + }, [ALC295_FIXUP_ASUS_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -9020,6 +9029,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), @@ -9053,6 +9063,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), @@ -9103,6 +9114,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8561, "Clevo NH[57][0-9][ER][ACDH]Q", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[57][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x866d, "Clevo NP5[05]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x867d, "Clevo NP7[01]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME), SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -9397,6 +9410,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, {.id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc298-samsung-headphone"}, + {.id = ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc256-samsung-headphone"}, {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"}, @@ -11067,6 +11081,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2), + SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index cd05ee2802c9e..5247015e8b316 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -556,6 +556,8 @@ int acp_legacy_dai_links_create(struct snd_soc_card *card) num_links++; links = devm_kzalloc(dev, sizeof(struct snd_soc_dai_link) * num_links, GFP_KERNEL); + if (!links) + return -ENOMEM; if (drv_data->hs_cpu_id == I2S_SP) { links[i].name = "acp-headset-codec"; diff --git a/sound/soc/amd/vangogh/acp5x-mach.c b/sound/soc/amd/vangogh/acp5x-mach.c index 14cf325e4b237..5d7a17755fa7f 100644 --- a/sound/soc/amd/vangogh/acp5x-mach.c +++ b/sound/soc/amd/vangogh/acp5x-mach.c @@ -165,6 +165,7 @@ static int acp5x_cs35l41_hw_params(struct snd_pcm_substream *substream, unsigned int num_codecs = rtd->num_codecs; unsigned int bclk_val; + ret = 0; for (i = 0; i < num_codecs; i++) { codec_dai = asoc_rtd_to_codec(rtd, i); if ((strcmp(codec_dai->name, "spi-VLV1776:00") == 0) || diff --git a/sound/soc/amd/vangogh/acp5x-pcm-dma.c b/sound/soc/amd/vangogh/acp5x-pcm-dma.c index f10de38976cb5..bfca4cf423cf1 100644 --- a/sound/soc/amd/vangogh/acp5x-pcm-dma.c +++ b/sound/soc/amd/vangogh/acp5x-pcm-dma.c @@ -281,7 +281,7 @@ static int acp5x_dma_hw_params(struct snd_soc_component *component, return -EINVAL; } size = params_buffer_bytes(params); - rtd->dma_addr = substream->dma_buffer.addr; + rtd->dma_addr = substream->runtime->dma_addr; rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT); config_acp5x_dma(rtd, substream->stream); return 0; @@ -426,51 +426,51 @@ static int acp5x_audio_remove(struct platform_device *pdev) static int __maybe_unused acp5x_pcm_resume(struct device *dev) { struct i2s_dev_data *adata; - u32 val, reg_val, frmt_val; + struct i2s_stream_instance *rtd; + u32 val; - reg_val = 0; - frmt_val = 0; adata = dev_get_drvdata(dev); if (adata->play_stream && adata->play_stream->runtime) { - struct i2s_stream_instance *rtd = - adata->play_stream->runtime->private_data; + rtd = adata->play_stream->runtime->private_data; config_acp5x_dma(rtd, SNDRV_PCM_STREAM_PLAYBACK); - switch (rtd->i2s_instance) { - case I2S_HS_INSTANCE: - reg_val = ACP_HSTDM_ITER; - frmt_val = ACP_HSTDM_TXFRMT; - break; - case I2S_SP_INSTANCE: - default: - reg_val = ACP_I2STDM_ITER; - frmt_val = ACP_I2STDM_TXFRMT; + acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_HSTDM_ITER); + if (adata->tdm_mode == TDM_ENABLE) { + acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_HSTDM_TXFRMT); + val = acp_readl(adata->acp5x_base + ACP_HSTDM_ITER); + acp_writel(val | 0x2, adata->acp5x_base + ACP_HSTDM_ITER); + } + } + if (adata->i2ssp_play_stream && adata->i2ssp_play_stream->runtime) { + rtd = adata->i2ssp_play_stream->runtime->private_data; + config_acp5x_dma(rtd, SNDRV_PCM_STREAM_PLAYBACK); + acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_I2STDM_ITER); + if (adata->tdm_mode == TDM_ENABLE) { + acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_I2STDM_TXFRMT); + val = acp_readl(adata->acp5x_base + ACP_I2STDM_ITER); + acp_writel(val | 0x2, adata->acp5x_base + ACP_I2STDM_ITER); } - acp_writel((rtd->xfer_resolution << 3), - rtd->acp5x_base + reg_val); } if (adata->capture_stream && adata->capture_stream->runtime) { - struct i2s_stream_instance *rtd = - adata->capture_stream->runtime->private_data; + rtd = adata->capture_stream->runtime->private_data; config_acp5x_dma(rtd, SNDRV_PCM_STREAM_CAPTURE); - switch (rtd->i2s_instance) { - case I2S_HS_INSTANCE: - reg_val = ACP_HSTDM_IRER; - frmt_val = ACP_HSTDM_RXFRMT; - break; - case I2S_SP_INSTANCE: - default: - reg_val = ACP_I2STDM_IRER; - frmt_val = ACP_I2STDM_RXFRMT; + acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_HSTDM_IRER); + if (adata->tdm_mode == TDM_ENABLE) { + acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_HSTDM_RXFRMT); + val = acp_readl(adata->acp5x_base + ACP_HSTDM_IRER); + acp_writel(val | 0x2, adata->acp5x_base + ACP_HSTDM_IRER); } - acp_writel((rtd->xfer_resolution << 3), - rtd->acp5x_base + reg_val); } - if (adata->tdm_mode == TDM_ENABLE) { - acp_writel(adata->tdm_fmt, adata->acp5x_base + frmt_val); - val = acp_readl(adata->acp5x_base + reg_val); - acp_writel(val | 0x2, adata->acp5x_base + reg_val); + if (adata->i2ssp_capture_stream && adata->i2ssp_capture_stream->runtime) { + rtd = adata->i2ssp_capture_stream->runtime->private_data; + config_acp5x_dma(rtd, SNDRV_PCM_STREAM_CAPTURE); + acp_writel((rtd->xfer_resolution << 3), rtd->acp5x_base + ACP_I2STDM_IRER); + if (adata->tdm_mode == TDM_ENABLE) { + acp_writel(adata->tdm_fmt, adata->acp5x_base + ACP_I2STDM_RXFRMT); + val = acp_readl(adata->acp5x_base + ACP_I2STDM_IRER); + acp_writel(val | 0x2, adata->acp5x_base + ACP_I2STDM_IRER); + } } acp_writel(1, adata->acp5x_base + ACP_EXTERNAL_INTR_ENB); return 0; diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 26e2bc690d86e..c1dea8d624164 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -280,7 +280,10 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream, /* Enable PMC peripheral clock for this SSC */ pr_debug("atmel_ssc_dai: Starting clock\n"); - clk_enable(ssc_p->ssc->clk); + ret = clk_enable(ssc_p->ssc->clk); + if (ret) + return ret; + ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk); /* Reset the SSC unless initialized to keep it in a clean state */ diff --git a/sound/soc/atmel/mikroe-proto.c b/sound/soc/atmel/mikroe-proto.c index 627564c18c270..ce46d8a0b7e43 100644 --- a/sound/soc/atmel/mikroe-proto.c +++ b/sound/soc/atmel/mikroe-proto.c @@ -115,7 +115,8 @@ static int snd_proto_probe(struct platform_device *pdev) cpu_np = of_parse_phandle(np, "i2s-controller", 0); if (!cpu_np) { dev_err(&pdev->dev, "i2s-controller missing\n"); - return -EINVAL; + ret = -EINVAL; + goto put_codec_node; } dai->cpus->of_node = cpu_np; dai->platforms->of_node = cpu_np; @@ -125,7 +126,8 @@ static int snd_proto_probe(struct platform_device *pdev) &bitclkmaster, &framemaster); if (bitclkmaster != framemaster) { dev_err(&pdev->dev, "Must be the same bitclock and frame master\n"); - return -EINVAL; + ret = -EINVAL; + goto put_cpu_node; } if (bitclkmaster) { if (codec_np == bitclkmaster) @@ -136,18 +138,20 @@ static int snd_proto_probe(struct platform_device *pdev) dai_fmt |= snd_soc_daifmt_parse_clock_provider_as_flag(np, NULL); } - of_node_put(bitclkmaster); - of_node_put(framemaster); - dai->dai_fmt = dai_fmt; - - of_node_put(codec_np); - of_node_put(cpu_np); + dai->dai_fmt = dai_fmt; ret = snd_soc_register_card(&snd_proto); if (ret) dev_err_probe(&pdev->dev, ret, "snd_soc_register_card() failed\n"); + +put_cpu_node: + of_node_put(bitclkmaster); + of_node_put(framemaster); + of_node_put(cpu_np); +put_codec_node: + of_node_put(codec_np); return ret; } diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index 915da92e1ec82..33e43013ff770 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -214,6 +214,7 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) cpu_np = of_parse_phandle(np, "atmel,ssc-controller", 0); if (!cpu_np) { dev_err(&pdev->dev, "dai and pcm info missing\n"); + of_node_put(codec_np); return -EINVAL; } at91sam9g20ek_dai.cpus->of_node = cpu_np; diff --git a/sound/soc/atmel/sam9x5_wm8731.c b/sound/soc/atmel/sam9x5_wm8731.c index 7c45dc4f8c1bb..99310e40e7a62 100644 --- a/sound/soc/atmel/sam9x5_wm8731.c +++ b/sound/soc/atmel/sam9x5_wm8731.c @@ -142,7 +142,7 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev) if (!cpu_np) { dev_err(&pdev->dev, "atmel,ssc-controller node missing\n"); ret = -EINVAL; - goto out; + goto out_put_codec_np; } dai->cpus->of_node = cpu_np; dai->platforms->of_node = cpu_np; @@ -153,12 +153,9 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev) if (ret != 0) { dev_err(&pdev->dev, "Failed to set SSC %d for audio: %d\n", ret, priv->ssc_id); - goto out; + goto out_put_cpu_np; } - of_node_put(codec_np); - of_node_put(cpu_np); - ret = devm_snd_soc_register_card(&pdev->dev, card); if (ret) { dev_err(&pdev->dev, "Platform device allocation failed\n"); @@ -167,10 +164,14 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "%s ok\n", __func__); - return ret; + goto out_put_cpu_np; out_put_audio: atmel_ssc_put_audio(priv->ssc_id); +out_put_cpu_np: + of_node_put(cpu_np); +out_put_codec_np: + of_node_put(codec_np); out: return ret; } diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index d3e5ae8310ef2..30c00380499cd 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -733,6 +733,7 @@ config SND_SOC_CS4349 config SND_SOC_CS47L15 tristate + depends on MFD_CS47L15 config SND_SOC_CS47L24 tristate @@ -740,15 +741,19 @@ config SND_SOC_CS47L24 config SND_SOC_CS47L35 tristate + depends on MFD_CS47L35 config SND_SOC_CS47L85 tristate + depends on MFD_CS47L85 config SND_SOC_CS47L90 tristate + depends on MFD_CS47L90 config SND_SOC_CS47L92 tristate + depends on MFD_CS47L92 # Cirrus Logic Quad-Channel ADC config SND_SOC_CS53L30 diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index 77a0176946459..f3787d77f892b 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -1035,8 +1035,8 @@ static int cs35l41_irq_gpio_config(struct cs35l41_private *cs35l41) regmap_update_bits(cs35l41->regmap, CS35L41_GPIO2_CTRL1, CS35L41_GPIO_POL_MASK | CS35L41_GPIO_DIR_MASK, - irq_gpio_cfg1->irq_pol_inv << CS35L41_GPIO_POL_SHIFT | - !irq_gpio_cfg1->irq_out_en << CS35L41_GPIO_DIR_SHIFT); + irq_gpio_cfg2->irq_pol_inv << CS35L41_GPIO_POL_SHIFT | + !irq_gpio_cfg2->irq_out_en << CS35L41_GPIO_DIR_SHIFT); regmap_update_bits(cs35l41->regmap, CS35L41_GPIO_PAD_CONTROL, CS35L41_GPIO1_CTRL_MASK | CS35L41_GPIO2_CTRL_MASK, @@ -1091,7 +1091,7 @@ static struct snd_soc_dai_driver cs35l41_dai[] = { .capture = { .stream_name = "AMP Capture", .channels_min = 1, - .channels_max = 8, + .channels_max = 4, .rates = SNDRV_PCM_RATE_KNOT, .formats = CS35L41_TX_FORMATS, }, diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 43d98bdb5b5b0..2c294868008ed 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -1637,7 +1637,11 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data) mutex_lock(&cs42l42->jack_detect_mutex); - /* Check auto-detect status */ + /* + * Check auto-detect status. Don't assume a previous unplug event has + * cleared the flags. If the jack is unplugged and plugged during + * system suspend there won't have been an unplug event. + */ if ((~masks[5]) & irq_params_table[5].mask) { if (stickies[5] & CS42L42_HSDET_AUTO_DONE_MASK) { cs42l42_process_hs_type_detect(cs42l42); @@ -1645,11 +1649,15 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data) case CS42L42_PLUG_CTIA: case CS42L42_PLUG_OMTP: snd_soc_jack_report(cs42l42->jack, SND_JACK_HEADSET, - SND_JACK_HEADSET); + SND_JACK_HEADSET | + SND_JACK_BTN_0 | SND_JACK_BTN_1 | + SND_JACK_BTN_2 | SND_JACK_BTN_3); break; case CS42L42_PLUG_HEADPHONE: snd_soc_jack_report(cs42l42->jack, SND_JACK_HEADPHONE, - SND_JACK_HEADPHONE); + SND_JACK_HEADSET | + SND_JACK_BTN_0 | SND_JACK_BTN_1 | + SND_JACK_BTN_2 | SND_JACK_BTN_3); break; default: break; diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index 6ffe88345de5f..3a3dc0539d921 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -2039,6 +2039,10 @@ static int rx_macro_load_compander_coeff(struct snd_soc_component *component, int i; int hph_pwr_mode; + /* AUX does not have compander */ + if (comp == INTERP_AUX) + return 0; + if (!rx->comp_enabled[comp]) return 0; @@ -2268,7 +2272,7 @@ static int rx_macro_mux_get(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_dapm_to_component(widget->dapm); struct rx_macro *rx = snd_soc_component_get_drvdata(component); - ucontrol->value.integer.value[0] = + ucontrol->value.enumerated.item[0] = rx->rx_port_value[widget->shift]; return 0; } @@ -2280,7 +2284,7 @@ static int rx_macro_mux_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_dapm_to_component(widget->dapm); struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; struct snd_soc_dapm_update *update = NULL; - u32 rx_port_value = ucontrol->value.integer.value[0]; + u32 rx_port_value = ucontrol->value.enumerated.item[0]; u32 aif_rst; struct rx_macro *rx = snd_soc_component_get_drvdata(component); @@ -2392,7 +2396,7 @@ static int rx_macro_get_hph_pwr_mode(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct rx_macro *rx = snd_soc_component_get_drvdata(component); - ucontrol->value.integer.value[0] = rx->hph_pwr_mode; + ucontrol->value.enumerated.item[0] = rx->hph_pwr_mode; return 0; } @@ -2402,7 +2406,7 @@ static int rx_macro_put_hph_pwr_mode(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct rx_macro *rx = snd_soc_component_get_drvdata(component); - rx->hph_pwr_mode = ucontrol->value.integer.value[0]; + rx->hph_pwr_mode = ucontrol->value.enumerated.item[0]; return 0; } @@ -3542,6 +3546,8 @@ static int rx_macro_probe(struct platform_device *pdev) return PTR_ERR(base); rx->regmap = devm_regmap_init_mmio(dev, base, &rx_regmap_config); + if (IS_ERR(rx->regmap)) + return PTR_ERR(rx->regmap); dev_set_drvdata(dev, rx); diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index a4c0a155af565..9c96ab1bf84f9 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -1821,6 +1821,8 @@ static int tx_macro_probe(struct platform_device *pdev) } tx->regmap = devm_regmap_init_mmio(dev, base, &tx_regmap_config); + if (IS_ERR(tx->regmap)) + return PTR_ERR(tx->regmap); dev_set_drvdata(dev, tx); diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index 11147e35689b2..e14c277e6a8b6 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -780,7 +780,7 @@ static int va_macro_dec_mode_get(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; int path = e->shift_l; - ucontrol->value.integer.value[0] = va->dec_mode[path]; + ucontrol->value.enumerated.item[0] = va->dec_mode[path]; return 0; } @@ -789,7 +789,7 @@ static int va_macro_dec_mode_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); - int value = ucontrol->value.integer.value[0]; + int value = ucontrol->value.enumerated.item[0]; struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; int path = e->shift_l; struct va_macro *va = snd_soc_component_get_drvdata(comp); diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 75baf8eb70299..69d2915f40d88 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -2405,6 +2405,8 @@ static int wsa_macro_probe(struct platform_device *pdev) return PTR_ERR(base); wsa->regmap = devm_regmap_init_mmio(dev, base, &wsa_regmap_config); + if (IS_ERR(wsa->regmap)) + return PTR_ERR(wsa->regmap); dev_set_drvdata(dev, wsa); diff --git a/sound/soc/codecs/max98927.c b/sound/soc/codecs/max98927.c index 5ba5f876eab87..fd84780bf689f 100644 --- a/sound/soc/codecs/max98927.c +++ b/sound/soc/codecs/max98927.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include "max98927.h" diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 485cda46dbb9b..e52a559c52d68 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -1222,8 +1222,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) } irq = platform_get_irq_byname(pdev, "mbhc_switch_int"); - if (irq < 0) - return irq; + if (irq < 0) { + ret = irq; + goto err_disable_clk; + } ret = devm_request_threaded_irq(dev, irq, NULL, pm8916_mbhc_switch_irq_handler, @@ -1235,8 +1237,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) if (priv->mbhc_btn_enabled) { irq = platform_get_irq_byname(pdev, "mbhc_but_press_det"); - if (irq < 0) - return irq; + if (irq < 0) { + ret = irq; + goto err_disable_clk; + } ret = devm_request_threaded_irq(dev, irq, NULL, mbhc_btn_press_irq_handler, @@ -1247,8 +1251,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) dev_err(dev, "cannot request mbhc button press irq\n"); irq = platform_get_irq_byname(pdev, "mbhc_but_rel_det"); - if (irq < 0) - return irq; + if (irq < 0) { + ret = irq; + goto err_disable_clk; + } ret = devm_request_threaded_irq(dev, irq, NULL, mbhc_btn_release_irq_handler, @@ -1265,6 +1271,10 @@ static int pm8916_wcd_analog_spmi_probe(struct platform_device *pdev) return devm_snd_soc_register_component(dev, &pm8916_wcd_analog, pm8916_wcd_analog_dai, ARRAY_SIZE(pm8916_wcd_analog_dai)); + +err_disable_clk: + clk_disable_unprepare(priv->mclk); + return ret; } static int pm8916_wcd_analog_spmi_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index fcc10c8bc6259..9ad7fc0baf072 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -1201,7 +1201,7 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev) ret = clk_prepare_enable(priv->mclk); if (ret < 0) { dev_err(dev, "failed to enable mclk %d\n", ret); - return ret; + goto err_clk; } dev_set_drvdata(dev, priv); @@ -1209,6 +1209,9 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev) return devm_snd_soc_register_component(dev, &msm8916_wcd_digital, msm8916_wcd_digital_dai, ARRAY_SIZE(msm8916_wcd_digital_dai)); +err_clk: + clk_disable_unprepare(priv->ahbclk); + return ret; } static int msm8916_wcd_digital_remove(struct platform_device *pdev) diff --git a/sound/soc/codecs/mt6358.c b/sound/soc/codecs/mt6358.c index 9b263a9a669dc..4c7b5d940799b 100644 --- a/sound/soc/codecs/mt6358.c +++ b/sound/soc/codecs/mt6358.c @@ -107,6 +107,7 @@ int mt6358_set_mtkaif_protocol(struct snd_soc_component *cmpnt, priv->mtkaif_protocol = mtkaif_protocol; return 0; } +EXPORT_SYMBOL_GPL(mt6358_set_mtkaif_protocol); static void playback_gpio_set(struct mt6358_priv *priv) { @@ -273,6 +274,7 @@ int mt6358_mtkaif_calibration_enable(struct snd_soc_component *cmpnt) 1 << RG_AUD_PAD_TOP_DAT_MISO_LOOPBACK_SFT); return 0; } +EXPORT_SYMBOL_GPL(mt6358_mtkaif_calibration_enable); int mt6358_mtkaif_calibration_disable(struct snd_soc_component *cmpnt) { @@ -296,6 +298,7 @@ int mt6358_mtkaif_calibration_disable(struct snd_soc_component *cmpnt) capture_gpio_reset(priv); return 0; } +EXPORT_SYMBOL_GPL(mt6358_mtkaif_calibration_disable); int mt6358_set_mtkaif_calibration_phase(struct snd_soc_component *cmpnt, int phase_1, int phase_2) @@ -310,6 +313,7 @@ int mt6358_set_mtkaif_calibration_phase(struct snd_soc_component *cmpnt, phase_2 << RG_AUD_PAD_TOP_PHASE_MODE2_SFT); return 0; } +EXPORT_SYMBOL_GPL(mt6358_set_mtkaif_calibration_phase); /* dl pga gain */ enum { diff --git a/sound/soc/codecs/rk817_codec.c b/sound/soc/codecs/rk817_codec.c index 03f24edfe4f64..8fffe378618d0 100644 --- a/sound/soc/codecs/rk817_codec.c +++ b/sound/soc/codecs/rk817_codec.c @@ -508,12 +508,14 @@ static int rk817_platform_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "%s() register codec error %d\n", __func__, ret); - goto err_; + goto err_clk; } return 0; -err_: +err_clk: + clk_disable_unprepare(rk817_codec_data->mclk); +err_: return ret; } diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c index 2138f62e6af5d..3a8fba101b20f 100644 --- a/sound/soc/codecs/rt5663.c +++ b/sound/soc/codecs/rt5663.c @@ -3478,6 +3478,8 @@ static int rt5663_parse_dp(struct rt5663_priv *rt5663, struct device *dev) table_size = sizeof(struct impedance_mapping_table) * rt5663->pdata.impedance_sensing_num; rt5663->imp_table = devm_kzalloc(dev, table_size, GFP_KERNEL); + if (!rt5663->imp_table) + return -ENOMEM; ret = device_property_read_u32_array(dev, "realtek,impedance_sensing_table", (u32 *)rt5663->imp_table, table_size); diff --git a/sound/soc/codecs/rt5682s.c b/sound/soc/codecs/rt5682s.c index 1e662d1be2b3e..92b8753f1267b 100644 --- a/sound/soc/codecs/rt5682s.c +++ b/sound/soc/codecs/rt5682s.c @@ -822,6 +822,7 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) { struct rt5682s_priv *rt5682s = container_of(work, struct rt5682s_priv, jack_detect_work.work); + struct snd_soc_dapm_context *dapm; int val, btn_type; if (!rt5682s->component || !rt5682s->component->card || @@ -832,7 +833,9 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) return; } - mutex_lock(&rt5682s->jdet_mutex); + dapm = snd_soc_component_get_dapm(rt5682s->component); + + snd_soc_dapm_mutex_lock(dapm); mutex_lock(&rt5682s->calibrate_mutex); val = snd_soc_component_read(rt5682s->component, RT5682S_AJD1_CTRL) @@ -889,6 +892,9 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) rt5682s->irq_work_delay_time = 50; } + mutex_unlock(&rt5682s->calibrate_mutex); + snd_soc_dapm_mutex_unlock(dapm); + snd_soc_jack_report(rt5682s->hs_jack, rt5682s->jack_type, SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | SND_JACK_BTN_3); @@ -898,9 +904,6 @@ static void rt5682s_jack_detect_handler(struct work_struct *work) schedule_delayed_work(&rt5682s->jd_check_work, 0); else cancel_delayed_work_sync(&rt5682s->jd_check_work); - - mutex_unlock(&rt5682s->calibrate_mutex); - mutex_unlock(&rt5682s->jdet_mutex); } static void rt5682s_jd_check_handler(struct work_struct *work) @@ -908,14 +911,9 @@ static void rt5682s_jd_check_handler(struct work_struct *work) struct rt5682s_priv *rt5682s = container_of(work, struct rt5682s_priv, jd_check_work.work); - if (snd_soc_component_read(rt5682s->component, RT5682S_AJD1_CTRL) - & RT5682S_JDH_RS_MASK) { + if (snd_soc_component_read(rt5682s->component, RT5682S_AJD1_CTRL) & RT5682S_JDH_RS_MASK) { /* jack out */ - rt5682s->jack_type = rt5682s_headset_detect(rt5682s->component, 0); - - snd_soc_jack_report(rt5682s->hs_jack, rt5682s->jack_type, - SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 | - SND_JACK_BTN_2 | SND_JACK_BTN_3); + schedule_delayed_work(&rt5682s->jack_detect_work, 0); } else { schedule_delayed_work(&rt5682s->jd_check_work, 500); } @@ -1323,7 +1321,6 @@ static int rt5682s_hp_amp_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - struct rt5682s_priv *rt5682s = snd_soc_component_get_drvdata(component); switch (event) { case SND_SOC_DAPM_POST_PMU: @@ -1339,8 +1336,6 @@ static int rt5682s_hp_amp_event(struct snd_soc_dapm_widget *w, snd_soc_component_write(component, RT5682S_BIAS_CUR_CTRL_11, 0x6666); snd_soc_component_write(component, RT5682S_BIAS_CUR_CTRL_12, 0xa82a); - mutex_lock(&rt5682s->jdet_mutex); - snd_soc_component_update_bits(component, RT5682S_HP_CTRL_2, RT5682S_HPO_L_PATH_MASK | RT5682S_HPO_R_PATH_MASK | RT5682S_HPO_SEL_IP_EN_SW, RT5682S_HPO_L_PATH_EN | @@ -1348,8 +1343,6 @@ static int rt5682s_hp_amp_event(struct snd_soc_dapm_widget *w, usleep_range(5000, 10000); snd_soc_component_update_bits(component, RT5682S_HP_AMP_DET_CTL_1, RT5682S_CP_SW_SIZE_MASK, RT5682S_CP_SW_SIZE_L | RT5682S_CP_SW_SIZE_S); - - mutex_unlock(&rt5682s->jdet_mutex); break; case SND_SOC_DAPM_POST_PMD: @@ -3103,7 +3096,6 @@ static int rt5682s_i2c_probe(struct i2c_client *i2c, mutex_init(&rt5682s->calibrate_mutex); mutex_init(&rt5682s->sar_mutex); - mutex_init(&rt5682s->jdet_mutex); rt5682s_calibrate(rt5682s); regmap_update_bits(rt5682s->regmap, RT5682S_MICBIAS_2, diff --git a/sound/soc/codecs/rt5682s.h b/sound/soc/codecs/rt5682s.h index 1bf2ef7ce5784..397a2531b6f68 100644 --- a/sound/soc/codecs/rt5682s.h +++ b/sound/soc/codecs/rt5682s.h @@ -1446,7 +1446,6 @@ struct rt5682s_priv { struct delayed_work jd_check_work; struct mutex calibrate_mutex; struct mutex sar_mutex; - struct mutex jdet_mutex; #ifdef CONFIG_COMMON_CLK struct clk_hw dai_clks_hw[RT5682S_DAI_NUM_CLKS]; diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 6c468527fec61..1e75e93cf28f2 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -3023,14 +3023,14 @@ static int wcd934x_hph_impedance_get(struct snd_kcontrol *kcontrol, return 0; } static const struct snd_kcontrol_new hph_type_detect_controls[] = { - SOC_SINGLE_EXT("HPH Type", 0, 0, UINT_MAX, 0, + SOC_SINGLE_EXT("HPH Type", 0, 0, WCD_MBHC_HPH_STEREO, 0, wcd934x_get_hph_type, NULL), }; static const struct snd_kcontrol_new impedance_detect_controls[] = { - SOC_SINGLE_EXT("HPHL Impedance", 0, 0, UINT_MAX, 0, + SOC_SINGLE_EXT("HPHL Impedance", 0, 0, INT_MAX, 0, wcd934x_hph_impedance_get, NULL), - SOC_SINGLE_EXT("HPHR Impedance", 0, 1, UINT_MAX, 0, + SOC_SINGLE_EXT("HPHR Impedance", 0, 1, INT_MAX, 0, wcd934x_hph_impedance_get, NULL), }; @@ -3308,13 +3308,16 @@ static int wcd934x_rx_hph_mode_put(struct snd_kcontrol *kc, mode_val = ucontrol->value.enumerated.item[0]; + if (mode_val == wcd->hph_mode) + return 0; + if (mode_val == 0) { dev_err(wcd->dev, "Invalid HPH Mode, default to ClSH HiFi\n"); mode_val = CLS_H_LOHIFI; } wcd->hph_mode = mode_val; - return 0; + return 1; } static int slim_rx_mux_get(struct snd_kcontrol *kc, @@ -5883,6 +5886,7 @@ static int wcd934x_codec_parse_data(struct wcd934x_codec *wcd) } wcd->sidev = of_slim_get_device(wcd->sdev->ctrl, ifc_dev_np); + of_node_put(ifc_dev_np); if (!wcd->sidev) { dev_err(dev, "Unable to get SLIM Interface device\n"); return -EINVAL; diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 36cbc66914f90..9ae65cbabb1aa 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -2504,7 +2504,7 @@ static int wcd938x_tx_mode_get(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; int path = e->shift_l; - ucontrol->value.integer.value[0] = wcd938x->tx_mode[path]; + ucontrol->value.enumerated.item[0] = wcd938x->tx_mode[path]; return 0; } @@ -2528,7 +2528,7 @@ static int wcd938x_rx_hph_mode_get(struct snd_kcontrol *kcontrol, struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); - ucontrol->value.integer.value[0] = wcd938x->hph_mode; + ucontrol->value.enumerated.item[0] = wcd938x->hph_mode; return 0; } @@ -3575,14 +3575,14 @@ static int wcd938x_hph_impedance_get(struct snd_kcontrol *kcontrol, } static const struct snd_kcontrol_new hph_type_detect_controls[] = { - SOC_SINGLE_EXT("HPH Type", 0, 0, UINT_MAX, 0, + SOC_SINGLE_EXT("HPH Type", 0, 0, WCD_MBHC_HPH_STEREO, 0, wcd938x_get_hph_type, NULL), }; static const struct snd_kcontrol_new impedance_detect_controls[] = { - SOC_SINGLE_EXT("HPHL Impedance", 0, 0, UINT_MAX, 0, + SOC_SINGLE_EXT("HPHL Impedance", 0, 0, INT_MAX, 0, wcd938x_hph_impedance_get, NULL), - SOC_SINGLE_EXT("HPHR Impedance", 0, 1, UINT_MAX, 0, + SOC_SINGLE_EXT("HPHR Impedance", 0, 1, INT_MAX, 0, wcd938x_hph_impedance_get, NULL), }; diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index 15d42ce3b21d6..41504ce2a682f 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1537,18 +1537,38 @@ static int wm8350_component_probe(struct snd_soc_component *component) wm8350_clear_bits(wm8350, WM8350_JACK_DETECT, WM8350_JDL_ENA | WM8350_JDR_ENA); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, wm8350_hpl_jack_handler, 0, "Left jack detect", priv); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, + if (ret != 0) + goto err; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, wm8350_hpr_jack_handler, 0, "Right jack detect", priv); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, + if (ret != 0) + goto free_jck_det_l; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, wm8350_mic_handler, 0, "Microphone short", priv); - wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICD, + if (ret != 0) + goto free_jck_det_r; + + ret = wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICD, wm8350_mic_handler, 0, "Microphone detect", priv); + if (ret != 0) + goto free_micscd; return 0; + +free_micscd: + wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, priv); +free_jck_det_r: + wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, priv); +free_jck_det_l: + wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, priv); +err: + return ret; } static void wm8350_component_remove(struct snd_soc_component *component) diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c index 5cb58929090d4..1edac3e10f345 100644 --- a/sound/soc/dwc/dwc-i2s.c +++ b/sound/soc/dwc/dwc-i2s.c @@ -403,9 +403,13 @@ static int dw_i2s_runtime_suspend(struct device *dev) static int dw_i2s_runtime_resume(struct device *dev) { struct dw_i2s_dev *dw_dev = dev_get_drvdata(dev); + int ret; - if (dw_dev->capability & DW_I2S_MASTER) - clk_enable(dw_dev->clk); + if (dw_dev->capability & DW_I2S_MASTER) { + ret = clk_enable(dw_dev->clk); + if (ret) + return ret; + } return 0; } @@ -422,10 +426,13 @@ static int dw_i2s_resume(struct snd_soc_component *component) { struct dw_i2s_dev *dev = snd_soc_component_get_drvdata(component); struct snd_soc_dai *dai; - int stream; + int stream, ret; - if (dev->capability & DW_I2S_MASTER) - clk_enable(dev->clk); + if (dev->capability & DW_I2S_MASTER) { + ret = clk_enable(dev->clk); + if (ret) + return ret; + } for_each_component_dais(component, dai) { for_each_pcm_streams(stream) diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c index d178b479c8bd4..06d4a014f296d 100644 --- a/sound/soc/fsl/fsl_spdif.c +++ b/sound/soc/fsl/fsl_spdif.c @@ -610,6 +610,8 @@ static void fsl_spdif_shutdown(struct snd_pcm_substream *substream, mask = SCR_TXFIFO_AUTOSYNC_MASK | SCR_TXFIFO_CTRL_MASK | SCR_TXSEL_MASK | SCR_USRC_SEL_MASK | SCR_TXFIFO_FSEL_MASK; + /* Disable TX clock */ + regmap_update_bits(regmap, REG_SPDIF_STC, STC_TXCLK_ALL_EN_MASK, 0); } else { scr = SCR_RXFIFO_OFF | SCR_RXFIFO_CTL_ZERO; mask = SCR_RXFIFO_FSEL_MASK | SCR_RXFIFO_AUTOSYNC_MASK| diff --git a/sound/soc/fsl/imx-es8328.c b/sound/soc/fsl/imx-es8328.c index 09c674ee79f1a..168973035e35f 100644 --- a/sound/soc/fsl/imx-es8328.c +++ b/sound/soc/fsl/imx-es8328.c @@ -87,6 +87,7 @@ static int imx_es8328_probe(struct platform_device *pdev) if (int_port > MUX_PORT_MAX || int_port == 0) { dev_err(dev, "mux-int-port: hardware only has %d mux ports\n", MUX_PORT_MAX); + ret = -EINVAL; goto fail; } diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index a81323d1691d0..9736102e68088 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -275,6 +275,7 @@ int asoc_simple_hw_params(struct snd_pcm_substream *substream, mclk_fs = props->mclk_fs; if (mclk_fs) { + struct snd_soc_component *component; mclk = params_rate(params) * mclk_fs; for_each_prop_dai_codec(props, i, pdai) { @@ -282,16 +283,30 @@ int asoc_simple_hw_params(struct snd_pcm_substream *substream, if (ret < 0) return ret; } + for_each_prop_dai_cpu(props, i, pdai) { ret = asoc_simple_set_clk_rate(pdai, mclk); if (ret < 0) return ret; } + + /* Ensure sysclk is set on all components in case any + * (such as platform components) are missed by calls to + * snd_soc_dai_set_sysclk. + */ + for_each_rtd_components(rtd, i, component) { + ret = snd_soc_component_set_sysclk(component, 0, 0, + mclk, SND_SOC_CLOCK_IN); + if (ret && ret != -ENOTSUPP) + return ret; + } + for_each_rtd_codec_dais(rtd, i, sdai) { ret = snd_soc_dai_set_sysclk(sdai, 0, mclk, SND_SOC_CLOCK_IN); if (ret && ret != -ENOTSUPP) return ret; } + for_each_rtd_cpu_dais(rtd, i, sdai) { ret = snd_soc_dai_set_sysclk(sdai, 0, mclk, SND_SOC_CLOCK_OUT); if (ret && ret != -ENOTSUPP) diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c index 20d577eaab6d7..28d7670b8f8f8 100644 --- a/sound/soc/intel/boards/sof_es8336.c +++ b/sound/soc/intel/boards/sof_es8336.c @@ -63,7 +63,12 @@ static const struct acpi_gpio_mapping *gpio_mapping = acpi_es8336_gpios; static void log_quirks(struct device *dev) { - dev_info(dev, "quirk SSP%ld", SOF_ES8336_SSP_CODEC(quirk)); + dev_info(dev, "quirk mask %#lx\n", quirk); + dev_info(dev, "quirk SSP%ld\n", SOF_ES8336_SSP_CODEC(quirk)); + if (quirk & SOF_ES8336_ENABLE_DMIC) + dev_info(dev, "quirk DMIC enabled\n"); + if (quirk & SOF_ES8336_TGL_GPIO_QUIRK) + dev_info(dev, "quirk TGL GPIO enabled\n"); } static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w, diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index da515eb1ddbe7..1f00679b42409 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -185,7 +185,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { .callback = sof_sdw_quirk_cb, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Conv"), }, .driver_data = (void *)(SOF_SDW_TGL_HDMI | SOF_SDW_PCH_DMIC | diff --git a/sound/soc/intel/common/soc-acpi-intel-bxt-match.c b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c index 342d340522045..04a92e74d99bc 100644 --- a/sound/soc/intel/common/soc-acpi-intel-bxt-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c @@ -41,6 +41,11 @@ static struct snd_soc_acpi_mach *apl_quirk(void *arg) return mach; } +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs bxt_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} @@ -83,7 +88,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_bxt_machines[] = { .sof_tplg_filename = "sof-apl-tdf8532.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-apl.ri", .sof_tplg_filename = "sof-apl-es8336.tplg", diff --git a/sound/soc/intel/common/soc-acpi-intel-cml-match.c b/sound/soc/intel/common/soc-acpi-intel-cml-match.c index 4eebc79d4b486..14395833d89e8 100644 --- a/sound/soc/intel/common/soc-acpi-intel-cml-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-cml-match.c @@ -9,6 +9,11 @@ #include #include +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs rt1011_spk_codecs = { .num_codecs = 1, .codecs = {"10EC1011"} @@ -82,7 +87,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cml_machines[] = { .sof_tplg_filename = "sof-cml-da7219-max98390.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-cml.ri", .sof_tplg_filename = "sof-cml-es8336.tplg", diff --git a/sound/soc/intel/common/soc-acpi-intel-glk-match.c b/sound/soc/intel/common/soc-acpi-intel-glk-match.c index 8492b7e2a9450..7aa6a870d5a5c 100644 --- a/sound/soc/intel/common/soc-acpi-intel-glk-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-glk-match.c @@ -9,6 +9,11 @@ #include #include +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs glk_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} @@ -58,7 +63,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_glk_machines[] = { .sof_tplg_filename = "sof-glk-cs42l42.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-glk.ri", .sof_tplg_filename = "sof-glk-es8336.tplg", diff --git a/sound/soc/intel/common/soc-acpi-intel-jsl-match.c b/sound/soc/intel/common/soc-acpi-intel-jsl-match.c index 278ec196da7bf..9d0d0e1437a4b 100644 --- a/sound/soc/intel/common/soc-acpi-intel-jsl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-jsl-match.c @@ -9,6 +9,11 @@ #include #include +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs jsl_7219_98373_codecs = { .num_codecs = 1, .codecs = {"MX98373"} @@ -87,7 +92,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_jsl_machines[] = { .sof_tplg_filename = "sof-jsl-cs42l42-mx98360a.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-jsl.ri", .sof_tplg_filename = "sof-jsl-es8336.tplg", diff --git a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c index da31bb3cca17c..e2658bca69318 100644 --- a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c @@ -10,6 +10,11 @@ #include #include "soc-acpi-intel-sdw-mockup-match.h" +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static const struct snd_soc_acpi_codecs tgl_codecs = { .num_codecs = 1, .codecs = {"MX98357A"} @@ -389,7 +394,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_tgl_machines[] = { .sof_tplg_filename = "sof-tgl-rt1011-rt5682.tplg", }, { - .id = "ESSX8336", + .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", .sof_fw_filename = "sof-tgl.ri", .sof_tplg_filename = "sof-tgl-es8336.tplg", diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index 718505c754188..f090dee0c7a4f 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -695,8 +695,11 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) } card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev); - if (!card) - return -EINVAL; + if (!card) { + ret = -EINVAL; + goto put_platform_node; + } + card->dev = &pdev->dev; hdmi_codec = of_parse_phandle(pdev->dev.of_node, @@ -761,12 +764,15 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) if (!mt8183_da7219_max98357_headset_dev.dlc.of_node) { dev_err(&pdev->dev, "Property 'mediatek,headset-codec' missing/invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_hdmi_codec; } priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + if (!priv) { + ret = -ENOMEM; + goto put_hdmi_codec; + } snd_soc_card_set_drvdata(card, priv); @@ -775,13 +781,16 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) ret = PTR_ERR(pinctrl); dev_err(&pdev->dev, "%s failed to select default state %d\n", __func__, ret); - return ret; + goto put_hdmi_codec; } ret = devm_snd_soc_register_card(&pdev->dev, card); - of_node_put(platform_node); + +put_hdmi_codec: of_node_put(hdmi_codec); +put_platform_node: + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c index f7daad1bfe1ed..ee91569c09117 100644 --- a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c +++ b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c @@ -1116,8 +1116,10 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev) } card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev); - if (!card) - return -EINVAL; + if (!card) { + ret = -EINVAL; + goto put_platform_node; + } card->dev = &pdev->dev; hdmi_codec = of_parse_phandle(pdev->dev.of_node, @@ -1159,20 +1161,24 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev) } priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + if (!priv) { + ret = -ENOMEM; + goto put_hdmi_codec; + } snd_soc_card_set_drvdata(card, priv); ret = mt8192_afe_gpio_init(&pdev->dev); if (ret) { dev_err(&pdev->dev, "init gpio error %d\n", ret); - return ret; + goto put_hdmi_codec; } ret = devm_snd_soc_register_card(&pdev->dev, card); - of_node_put(platform_node); +put_hdmi_codec: of_node_put(hdmi_codec); +put_platform_node: + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c index 29c2d3407cc7c..e3146311722f8 100644 --- a/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c +++ b/sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c @@ -1342,7 +1342,8 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev) "mediatek,dai-link"); if (ret) { dev_dbg(&pdev->dev, "Parse dai-link fail\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } } else { if (!sof_on) @@ -1398,6 +1399,7 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, card); +put_node: of_node_put(platform_node); of_node_put(adsp_node); of_node_put(dp_node); diff --git a/sound/soc/mxs/mxs-saif.c b/sound/soc/mxs/mxs-saif.c index 6a2d24d489647..879c1221a809b 100644 --- a/sound/soc/mxs/mxs-saif.c +++ b/sound/soc/mxs/mxs-saif.c @@ -455,7 +455,10 @@ static int mxs_saif_hw_params(struct snd_pcm_substream *substream, * basic clock which should be fast enough for the internal * logic. */ - clk_enable(saif->clk); + ret = clk_enable(saif->clk); + if (ret) + return ret; + ret = clk_set_rate(saif->clk, 24000000); clk_disable(saif->clk); if (ret) diff --git a/sound/soc/mxs/mxs-sgtl5000.c b/sound/soc/mxs/mxs-sgtl5000.c index 2412dc7e65d44..746f409386751 100644 --- a/sound/soc/mxs/mxs-sgtl5000.c +++ b/sound/soc/mxs/mxs-sgtl5000.c @@ -118,6 +118,9 @@ static int mxs_sgtl5000_probe(struct platform_device *pdev) codec_np = of_parse_phandle(np, "audio-codec", 0); if (!saif_np[0] || !saif_np[1] || !codec_np) { dev_err(&pdev->dev, "phandle missing or invalid\n"); + of_node_put(codec_np); + of_node_put(saif_np[0]); + of_node_put(saif_np[1]); return -EINVAL; } diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index a6d7656c206e5..4ce5d25793875 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -716,19 +716,23 @@ static int rockchip_i2s_probe(struct platform_device *pdev) i2s->mclk = devm_clk_get(&pdev->dev, "i2s_clk"); if (IS_ERR(i2s->mclk)) { dev_err(&pdev->dev, "Can't retrieve i2s master clock\n"); - return PTR_ERR(i2s->mclk); + ret = PTR_ERR(i2s->mclk); + goto err_clk; } regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); - if (IS_ERR(regs)) - return PTR_ERR(regs); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + goto err_clk; + } i2s->regmap = devm_regmap_init_mmio(&pdev->dev, regs, &rockchip_i2s_regmap_config); if (IS_ERR(i2s->regmap)) { dev_err(&pdev->dev, "Failed to initialise managed register map\n"); - return PTR_ERR(i2s->regmap); + ret = PTR_ERR(i2s->regmap); + goto err_clk; } i2s->bclk_ratio = 64; @@ -768,7 +772,8 @@ static int rockchip_i2s_probe(struct platform_device *pdev) i2s_runtime_suspend(&pdev->dev); err_pm_disable: pm_runtime_disable(&pdev->dev); - +err_clk: + clk_disable_unprepare(i2s->hclk); return ret; } diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 5f9cb5c4c7f09..98700e75b82a1 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -469,14 +469,14 @@ static int rockchip_i2s_tdm_set_fmt(struct snd_soc_dai *cpu_dai, txcr_val = I2S_TXCR_IBM_NORMAL; rxcr_val = I2S_RXCR_IBM_NORMAL; break; - case SND_SOC_DAIFMT_DSP_A: /* PCM no delay mode */ - txcr_val = I2S_TXCR_TFS_PCM; - rxcr_val = I2S_RXCR_TFS_PCM; - break; - case SND_SOC_DAIFMT_DSP_B: /* PCM delay 1 mode */ + case SND_SOC_DAIFMT_DSP_A: /* PCM delay 1 mode */ txcr_val = I2S_TXCR_TFS_PCM | I2S_TXCR_PBM_MODE(1); rxcr_val = I2S_RXCR_TFS_PCM | I2S_RXCR_PBM_MODE(1); break; + case SND_SOC_DAIFMT_DSP_B: /* PCM no delay mode */ + txcr_val = I2S_TXCR_TFS_PCM; + rxcr_val = I2S_RXCR_TFS_PCM; + break; default: ret = -EINVAL; goto err_pm_put; @@ -1738,7 +1738,7 @@ static int __maybe_unused rockchip_i2s_tdm_resume(struct device *dev) struct rk_i2s_tdm_dev *i2s_tdm = dev_get_drvdata(dev); int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; ret = regcache_sync(i2s_tdm->regmap); diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c index cdf3b7f69ba70..e9a1eb6bdf66a 100644 --- a/sound/soc/sh/fsi.c +++ b/sound/soc/sh/fsi.c @@ -816,14 +816,27 @@ static int fsi_clk_enable(struct device *dev, return ret; } - clk_enable(clock->xck); - clk_enable(clock->ick); - clk_enable(clock->div); + ret = clk_enable(clock->xck); + if (ret) + goto err; + ret = clk_enable(clock->ick); + if (ret) + goto disable_xck; + ret = clk_enable(clock->div); + if (ret) + goto disable_ick; clock->count++; } return ret; + +disable_ick: + clk_disable(clock->ick); +disable_xck: + clk_disable(clock->xck); +err: + return ret; } static int fsi_clk_disable(struct device *dev, diff --git a/sound/soc/sh/rz-ssi.c b/sound/soc/sh/rz-ssi.c index e8d98b362f9db..7379b1489e358 100644 --- a/sound/soc/sh/rz-ssi.c +++ b/sound/soc/sh/rz-ssi.c @@ -411,54 +411,56 @@ static int rz_ssi_pio_recv(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) { struct snd_pcm_substream *substream = strm->substream; struct snd_pcm_runtime *runtime; + bool done = false; u16 *buf; int fifo_samples; int frames_left; - int samples = 0; + int samples; int i; if (!rz_ssi_stream_is_valid(ssi, strm)) return -EINVAL; runtime = substream->runtime; - /* frames left in this period */ - frames_left = runtime->period_size - (strm->buffer_pos % - runtime->period_size); - if (frames_left == 0) - frames_left = runtime->period_size; - /* Samples in RX FIFO */ - fifo_samples = (rz_ssi_reg_readl(ssi, SSIFSR) >> - SSIFSR_RDC_SHIFT) & SSIFSR_RDC_MASK; + while (!done) { + /* frames left in this period */ + frames_left = runtime->period_size - + (strm->buffer_pos % runtime->period_size); + if (!frames_left) + frames_left = runtime->period_size; + + /* Samples in RX FIFO */ + fifo_samples = (rz_ssi_reg_readl(ssi, SSIFSR) >> + SSIFSR_RDC_SHIFT) & SSIFSR_RDC_MASK; + + /* Only read full frames at a time */ + samples = 0; + while (frames_left && (fifo_samples >= runtime->channels)) { + samples += runtime->channels; + fifo_samples -= runtime->channels; + frames_left--; + } - /* Only read full frames at a time */ - while (frames_left && (fifo_samples >= runtime->channels)) { - samples += runtime->channels; - fifo_samples -= runtime->channels; - frames_left--; - } + /* not enough samples yet */ + if (!samples) + break; - /* not enough samples yet */ - if (samples == 0) - return 0; + /* calculate new buffer index */ + buf = (u16 *)(runtime->dma_area); + buf += strm->buffer_pos * runtime->channels; - /* calculate new buffer index */ - buf = (u16 *)(runtime->dma_area); - buf += strm->buffer_pos * runtime->channels; - - /* Note, only supports 16-bit samples */ - for (i = 0; i < samples; i++) - *buf++ = (u16)(rz_ssi_reg_readl(ssi, SSIFRDR) >> 16); + /* Note, only supports 16-bit samples */ + for (i = 0; i < samples; i++) + *buf++ = (u16)(rz_ssi_reg_readl(ssi, SSIFRDR) >> 16); - rz_ssi_reg_mask_setl(ssi, SSIFSR, SSIFSR_RDF, 0); - rz_ssi_pointer_update(strm, samples / runtime->channels); + rz_ssi_reg_mask_setl(ssi, SSIFSR, SSIFSR_RDF, 0); + rz_ssi_pointer_update(strm, samples / runtime->channels); - /* - * If we finished this period, but there are more samples in - * the RX FIFO, call this function again - */ - if (frames_left == 0 && fifo_samples >= runtime->channels) - rz_ssi_pio_recv(ssi, strm); + /* check if there are no more samples in the RX FIFO */ + if (!(!frames_left && fifo_samples >= runtime->channels)) + done = true; + } return 0; } @@ -975,6 +977,9 @@ static int rz_ssi_probe(struct platform_device *pdev) ssi->playback.priv = ssi; ssi->capture.priv = ssi; + spin_lock_init(&ssi->lock); + dev_set_drvdata(&pdev->dev, ssi); + /* Error Interrupt */ ssi->irq_int = platform_get_irq_byname(pdev, "int_req"); if (ssi->irq_int < 0) @@ -1027,8 +1032,6 @@ static int rz_ssi_probe(struct platform_device *pdev) return dev_err_probe(ssi->dev, ret, "pm_runtime_resume_and_get failed\n"); } - spin_lock_init(&ssi->lock); - dev_set_drvdata(&pdev->dev, ssi); ret = devm_snd_soc_register_component(&pdev->dev, &rz_ssi_soc_component, rz_ssi_soc_dai, ARRAY_SIZE(rz_ssi_soc_dai)); diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 8e2494a9f3a7f..e9dd25894dc0f 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -567,6 +567,11 @@ int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) return -EINVAL; } + if (!codec_dai) { + dev_err(rtd->card->dev, "Missing codec\n"); + return -EINVAL; + } + /* check client and interface hw capabilities */ if (snd_soc_dai_stream_valid(codec_dai, SNDRV_PCM_STREAM_PLAYBACK) && snd_soc_dai_stream_valid(cpu_dai, SNDRV_PCM_STREAM_PLAYBACK)) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 434e61b46983c..a088bc9f7dd7c 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3233,7 +3233,7 @@ int snd_soc_get_dai_name(const struct of_phandle_args *args, for_each_component(pos) { struct device_node *component_of_node = soc_component_to_node(pos); - if (component_of_node != args->np) + if (component_of_node != args->np || !pos->num_dai) continue; ret = snd_soc_component_of_xlate_dai_name(pos, args, dai_name); diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index c54c8ca8d7156..359987bf76d1b 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -86,10 +86,10 @@ static int dmaengine_pcm_hw_params(struct snd_soc_component *component, memset(&slave_config, 0, sizeof(slave_config)); - if (!pcm->config) - prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config; - else + if (pcm->config && pcm->config->prepare_slave_config) prepare_slave_config = pcm->config->prepare_slave_config; + else + prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config; if (prepare_slave_config) { int ret = prepare_slave_config(substream, params, &slave_config); diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 2630df024dff3..cb24805668bd8 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -512,7 +512,8 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr, if (le32_to_cpu(hdr->ops.info) == SND_SOC_TPLG_CTL_BYTES && k->iface & SNDRV_CTL_ELEM_IFACE_MIXER - && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE + && (k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ + || k->access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { struct soc_bytes_ext *sbe; struct snd_soc_tplg_bytes_control *be; diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index 6d6757075f7c3..e755c0c5f86c0 100644 --- a/sound/soc/sof/debug.c +++ b/sound/soc/sof/debug.c @@ -960,7 +960,7 @@ static void snd_sof_dbg_print_fw_state(struct snd_sof_dev *sdev, const char *lev void snd_sof_dsp_dbg_dump(struct snd_sof_dev *sdev, const char *msg, u32 flags) { - char *level = flags & SOF_DBG_DUMP_OPTIONAL ? KERN_DEBUG : KERN_ERR; + char *level = (flags & SOF_DBG_DUMP_OPTIONAL) ? KERN_DEBUG : KERN_ERR; bool print_all = sof_debug_check_flag(SOF_DBG_PRINT_ALL_DUMPS); if (flags & SOF_DBG_DUMP_OPTIONAL && !print_all) diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c index 788e77bcb6038..60251486b24b2 100644 --- a/sound/soc/sof/imx/imx8m.c +++ b/sound/soc/sof/imx/imx8m.c @@ -224,6 +224,7 @@ static int imx8m_probe(struct snd_sof_dev *sdev) } ret = of_address_to_resource(res_node, 0, &res); + of_node_put(res_node); if (ret) { dev_err(&pdev->dev, "failed to get reserved region address\n"); goto exit_pdev_unregister; diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 88b6176af021c..d83e1a36707af 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -84,6 +84,7 @@ if SND_SOC_SOF_PCI config SND_SOC_SOF_MERRIFIELD tristate "SOF support for Tangier/Merrifield" default SND_SOC_SOF_PCI + select SND_SOC_SOF_PCI_DEV select SND_SOC_SOF_INTEL_ATOM_HIFI_EP help This adds support for Sound Open Firmware for Intel(R) platforms diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index cd12589355eff..28a54145c1506 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -59,6 +59,8 @@ static struct hdac_ext_stream * { struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); struct sof_intel_hda_stream *hda_stream; + const struct sof_intel_dsp_desc *chip; + struct snd_sof_dev *sdev; struct hdac_ext_stream *res = NULL; struct hdac_stream *stream = NULL; @@ -77,9 +79,20 @@ static struct hdac_ext_stream * continue; hda_stream = hstream_to_sof_hda_stream(hstream); + sdev = hda_stream->sdev; + chip = get_chip_info(sdev->pdata); /* check if link is available */ if (!hstream->link_locked) { + /* + * choose the first available link for platforms that do not have the + * PROCEN_FMT_QUIRK set. + */ + if (!(chip->quirks & SOF_INTEL_PROCEN_FMT_QUIRK)) { + res = hstream; + break; + } + if (stream->opened) { /* * check if the stream tag matches the stream diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index 33306d2023a78..9bbfdab8009de 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -47,7 +47,7 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, &pci->dev, size, dmab); if (ret < 0) { dev_err(sdev->dev, "error: memory alloc failed: %d\n", ret); - goto error; + goto out_put; } hstream->period_bytes = 0;/* initialize period_bytes */ @@ -58,22 +58,23 @@ static struct hdac_ext_stream *cl_stream_prepare(struct snd_sof_dev *sdev, unsig ret = hda_dsp_iccmax_stream_hw_params(sdev, dsp_stream, dmab, NULL); if (ret < 0) { dev_err(sdev->dev, "error: iccmax stream prepare failed: %d\n", ret); - goto error; + goto out_free; } } else { ret = hda_dsp_stream_hw_params(sdev, dsp_stream, dmab, NULL); if (ret < 0) { dev_err(sdev->dev, "error: hdac prepare failed: %d\n", ret); - goto error; + goto out_free; } hda_dsp_stream_spib_config(sdev, dsp_stream, HDA_DSP_SPIB_ENABLE, size); } return dsp_stream; -error: - hda_dsp_stream_put(sdev, direction, hstream->stream_tag); +out_free: snd_dma_free_pages(dmab); +out_put: + hda_dsp_stream_put(sdev, direction, hstream->stream_tag); return ERR_PTR(ret); } diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c index d78aa5d8552d5..8aeb00eacd219 100644 --- a/sound/soc/sof/intel/hda-pcm.c +++ b/sound/soc/sof/intel/hda-pcm.c @@ -315,6 +315,7 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev, runtime->hw.info &= ~SNDRV_PCM_INFO_PAUSE; if (hda_always_enable_dmi_l1 || + direction == SNDRV_PCM_STREAM_PLAYBACK || spcm->stream[substream->stream].d0i3_compatible) flags |= SOF_HDA_STREAM_DMI_L1_COMPATIBLE; diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 1385695d77458..028751549f6da 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -432,11 +432,9 @@ static char *hda_model; module_param(hda_model, charp, 0444); MODULE_PARM_DESC(hda_model, "Use the given HDA board model."); -#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) || IS_ENABLED(CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE) -static int hda_dmic_num = -1; -module_param_named(dmic_num, hda_dmic_num, int, 0444); +static int dmic_num_override = -1; +module_param_named(dmic_num, dmic_num_override, int, 0444); MODULE_PARM_DESC(dmic_num, "SOF HDA DMIC number"); -#endif #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) static bool hda_codec_use_common_hdmi = IS_ENABLED(CONFIG_SND_HDA_CODEC_HDMI); @@ -644,24 +642,35 @@ static int hda_init(struct snd_sof_dev *sdev) return ret; } -#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) || IS_ENABLED(CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE) - -static int check_nhlt_dmic(struct snd_sof_dev *sdev) +static int check_dmic_num(struct snd_sof_dev *sdev) { struct nhlt_acpi_table *nhlt; - int dmic_num; + int dmic_num = 0; nhlt = intel_nhlt_init(sdev->dev); if (nhlt) { dmic_num = intel_nhlt_get_dmic_geo(sdev->dev, nhlt); intel_nhlt_free(nhlt); - if (dmic_num >= 1 && dmic_num <= 4) - return dmic_num; } - return 0; + /* allow for module parameter override */ + if (dmic_num_override != -1) { + dev_dbg(sdev->dev, + "overriding DMICs detected in NHLT tables %d by kernel param %d\n", + dmic_num, dmic_num_override); + dmic_num = dmic_num_override; + } + + if (dmic_num < 0 || dmic_num > 4) { + dev_dbg(sdev->dev, "invalid dmic_number %d\n", dmic_num); + dmic_num = 0; + } + + return dmic_num; } +#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) || IS_ENABLED(CONFIG_SND_SOC_SOF_INTEL_SOUNDWIRE) + static const char *fixup_tplg_name(struct snd_sof_dev *sdev, const char *sof_tplg_filename, const char *idisp_str, @@ -697,16 +706,8 @@ static int dmic_topology_fixup(struct snd_sof_dev *sdev, const char *dmic_str; int dmic_num; - /* first check NHLT for DMICs */ - dmic_num = check_nhlt_dmic(sdev); - - /* allow for module parameter override */ - if (hda_dmic_num != -1) { - dev_dbg(sdev->dev, - "overriding DMICs detected in NHLT tables %d by kernel param %d\n", - dmic_num, hda_dmic_num); - dmic_num = hda_dmic_num; - } + /* first check for DMICs (using NHLT or module parameter) */ + dmic_num = check_dmic_num(sdev); switch (dmic_num) { case 1: @@ -1188,7 +1189,7 @@ static bool link_slaves_found(struct snd_sof_dev *sdev, struct hdac_bus *bus = sof_to_bus(sdev); struct sdw_intel_slave_id *ids = sdw->ids; int num_slaves = sdw->num_slaves; - unsigned int part_id, link_id, unique_id, mfg_id; + unsigned int part_id, link_id, unique_id, mfg_id, version; int i, j, k; for (i = 0; i < link->num_adr; i++) { @@ -1198,12 +1199,14 @@ static bool link_slaves_found(struct snd_sof_dev *sdev, mfg_id = SDW_MFG_ID(adr); part_id = SDW_PART_ID(adr); link_id = SDW_DISCO_LINK_ID(adr); + version = SDW_VERSION(adr); for (j = 0; j < num_slaves; j++) { /* find out how many identical parts were reported on that link */ if (ids[j].link_id == link_id && ids[j].id.part_id == part_id && - ids[j].id.mfg_id == mfg_id) + ids[j].id.mfg_id == mfg_id && + ids[j].id.sdw_version == version) reported_part_count++; } @@ -1212,21 +1215,24 @@ static bool link_slaves_found(struct snd_sof_dev *sdev, if (ids[j].link_id != link_id || ids[j].id.part_id != part_id || - ids[j].id.mfg_id != mfg_id) + ids[j].id.mfg_id != mfg_id || + ids[j].id.sdw_version != version) continue; /* find out how many identical parts are expected */ for (k = 0; k < link->num_adr; k++) { u64 adr2 = link->adr_d[k].adr; - unsigned int part_id2, link_id2, mfg_id2; + unsigned int part_id2, link_id2, mfg_id2, version2; mfg_id2 = SDW_MFG_ID(adr2); part_id2 = SDW_PART_ID(adr2); link_id2 = SDW_DISCO_LINK_ID(adr2); + version2 = SDW_VERSION(adr2); if (link_id2 == link_id && part_id2 == part_id && - mfg_id2 == mfg_id) + mfg_id2 == mfg_id && + version2 == version) expected_part_count++; } @@ -1387,6 +1393,9 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) if (!sof_pdata->tplg_filename) sof_pdata->tplg_filename = mach->sof_tplg_filename; + /* report to machine driver if any DMICs are found */ + mach->mach_params.dmic_num = check_dmic_num(sdev); + if (mach->link_mask) { mach->mach_params.links = mach->links; mach->mach_params.link_mask = mach->link_mask; diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index 2ed92c990b97c..dd9013c476649 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -91,7 +91,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) SET_UNIPERIF_ITM_BCLR_FIFO_ERROR(player); /* Stop the player */ - snd_pcm_stop_xrun(player->substream); + snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); } ret = IRQ_HANDLED; @@ -105,7 +105,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) SET_UNIPERIF_ITM_BCLR_DMA_ERROR(player); /* Stop the player */ - snd_pcm_stop_xrun(player->substream); + snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); ret = IRQ_HANDLED; } @@ -138,7 +138,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) dev_err(player->dev, "Underflow recovery failed\n"); /* Stop the player */ - snd_pcm_stop_xrun(player->substream); + snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); ret = IRQ_HANDLED; } diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 136059331211d..065c5f0d1f5f0 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -65,7 +65,7 @@ static irqreturn_t uni_reader_irq_handler(int irq, void *dev_id) if (unlikely(status & UNIPERIF_ITS_FIFO_ERROR_MASK(reader))) { dev_err(reader->dev, "FIFO error detected\n"); - snd_pcm_stop_xrun(reader->substream); + snd_pcm_stop(reader->substream, SNDRV_PCM_STATE_XRUN); ret = IRQ_HANDLED; } diff --git a/sound/soc/ti/davinci-i2s.c b/sound/soc/ti/davinci-i2s.c index 6dca51862dd76..0363a088d2e00 100644 --- a/sound/soc/ti/davinci-i2s.c +++ b/sound/soc/ti/davinci-i2s.c @@ -708,7 +708,9 @@ static int davinci_i2s_probe(struct platform_device *pdev) dev->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(dev->clk)) return -ENODEV; - clk_enable(dev->clk); + ret = clk_enable(dev->clk); + if (ret) + goto err_put_clk; dev->dev = &pdev->dev; dev_set_drvdata(&pdev->dev, dev); @@ -730,6 +732,7 @@ static int davinci_i2s_probe(struct platform_device *pdev) snd_soc_unregister_component(&pdev->dev); err_release_clk: clk_disable(dev->clk); +err_put_clk: clk_put(dev->clk); return ret; } diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c index ce19a6058b279..5c4158069a5a8 100644 --- a/sound/soc/xilinx/xlnx_formatter_pcm.c +++ b/sound/soc/xilinx/xlnx_formatter_pcm.c @@ -84,6 +84,7 @@ struct xlnx_pcm_drv_data { struct snd_pcm_substream *play_stream; struct snd_pcm_substream *capture_stream; struct clk *axi_clk; + unsigned int sysclk; }; /* @@ -314,6 +315,15 @@ static irqreturn_t xlnx_s2mm_irq_handler(int irq, void *arg) return IRQ_NONE; } +static int xlnx_formatter_set_sysclk(struct snd_soc_component *component, + int clk_id, int source, unsigned int freq, int dir) +{ + struct xlnx_pcm_drv_data *adata = dev_get_drvdata(component->dev); + + adata->sysclk = freq; + return 0; +} + static int xlnx_formatter_pcm_open(struct snd_soc_component *component, struct snd_pcm_substream *substream) { @@ -450,11 +460,25 @@ static int xlnx_formatter_pcm_hw_params(struct snd_soc_component *component, u64 size; struct snd_pcm_runtime *runtime = substream->runtime; struct xlnx_pcm_stream_param *stream_data = runtime->private_data; + struct xlnx_pcm_drv_data *adata = dev_get_drvdata(component->dev); active_ch = params_channels(params); if (active_ch > stream_data->ch_limit) return -EINVAL; + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && + adata->sysclk) { + unsigned int mclk_fs = adata->sysclk / params_rate(params); + + if (adata->sysclk % params_rate(params) != 0) { + dev_warn(component->dev, "sysclk %u not divisible by rate %u\n", + adata->sysclk, params_rate(params)); + return -EINVAL; + } + + writel(mclk_fs, stream_data->mmio + XLNX_AUD_FS_MULTIPLIER); + } + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE && stream_data->xfer_mode == AES_TO_PCM) { val = readl(stream_data->mmio + XLNX_AUD_STS); @@ -552,6 +576,7 @@ static int xlnx_formatter_pcm_new(struct snd_soc_component *component, static const struct snd_soc_component_driver xlnx_asoc_component = { .name = DRV_NAME, + .set_sysclk = xlnx_formatter_set_sysclk, .open = xlnx_formatter_pcm_open, .close = xlnx_formatter_pcm_close, .hw_params = xlnx_formatter_pcm_hw_params, diff --git a/sound/spi/at73c213.c b/sound/spi/at73c213.c index 76c0e37a838cf..8a2da6b1012eb 100644 --- a/sound/spi/at73c213.c +++ b/sound/spi/at73c213.c @@ -218,7 +218,9 @@ static int snd_at73c213_pcm_open(struct snd_pcm_substream *substream) runtime->hw = snd_at73c213_playback_hw; chip->substream = substream; - clk_enable(chip->ssc->clk); + err = clk_enable(chip->ssc->clk); + if (err) + return err; return 0; } @@ -776,7 +778,9 @@ static int snd_at73c213_chip_init(struct snd_at73c213 *chip) goto out; /* Enable DAC master clock. */ - clk_enable(chip->board->dac_clk); + retval = clk_enable(chip->board->dac_clk); + if (retval) + goto out; /* Initialize at73c213 on SPI bus. */ retval = snd_at73c213_write_reg(chip, DAC_RST, 0x04); @@ -889,7 +893,9 @@ static int snd_at73c213_dev_init(struct snd_card *card, chip->card = card; chip->irq = -1; - clk_enable(chip->ssc->clk); + retval = clk_enable(chip->ssc->clk); + if (retval) + return retval; retval = request_irq(irq, snd_at73c213_interrupt, 0, "at73c213", chip); if (retval) { @@ -1008,7 +1014,9 @@ static int snd_at73c213_remove(struct spi_device *spi) int retval; /* Stop playback. */ - clk_enable(chip->ssc->clk); + retval = clk_enable(chip->ssc->clk); + if (retval) + goto out; ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS)); clk_disable(chip->ssc->clk); @@ -1088,9 +1096,16 @@ static int snd_at73c213_resume(struct device *dev) { struct snd_card *card = dev_get_drvdata(dev); struct snd_at73c213 *chip = card->private_data; + int retval; - clk_enable(chip->board->dac_clk); - clk_enable(chip->ssc->clk); + retval = clk_enable(chip->board->dac_clk); + if (retval) + return retval; + retval = clk_enable(chip->ssc->clk); + if (retval) { + clk_disable(chip->board->dac_clk); + return retval; + } ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXEN)); return 0; diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 96991ddf5055d..64f5544d0a0aa 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -542,6 +542,16 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x05a7, 0x40fa), .map = bose_soundlink_map, }, + { + /* Corsair Virtuoso SE Latest (wired mode) */ + .id = USB_ID(0x1b1c, 0x0a3f), + .map = corsair_virtuoso_map, + }, + { + /* Corsair Virtuoso SE Latest (wireless mode) */ + .id = USB_ID(0x1b1c, 0x0a40), + .map = corsair_virtuoso_map, + }, { /* Corsair Virtuoso SE (wired mode) */ .id = USB_ID(0x1b1c, 0x0a3d), diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index e447ddd6854cd..d35cf54cab338 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3360,9 +3360,10 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, if (unitid == 7 && cval->control == UAC_FU_VOLUME) snd_dragonfly_quirk_db_scale(mixer, cval, kctl); break; - /* lowest playback value is muted on C-Media devices */ - case USB_ID(0x0d8c, 0x000c): - case USB_ID(0x0d8c, 0x0014): + /* lowest playback value is muted on some devices */ + case USB_ID(0x0d8c, 0x000c): /* C-Media */ + case USB_ID(0x0d8c, 0x0014): /* C-Media */ + case USB_ID(0x19f7, 0x0003): /* RODE NT-USB */ if (strstr(kctl->id.name, "Playback")) cval->min_mute = 1; break; diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 59833125ac0a1..a2c665beda87c 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -902,7 +902,7 @@ static int do_show(int argc, char **argv) equal_fn_for_key_as_id, NULL); btf_map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!btf_prog_table || !btf_map_table) { + if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) { hashmap__free(btf_prog_table); hashmap__free(btf_map_table); if (fd >= 0) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index b4695df2ea3d7..a7387c265e3cf 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -927,7 +927,6 @@ static int do_skeleton(int argc, char **argv) s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ if (!s) \n\ goto err; \n\ - obj->skeleton = s; \n\ \n\ s->sz = sizeof(*s); \n\ s->name = \"%1$s\"; \n\ @@ -1000,6 +999,7 @@ static int do_skeleton(int argc, char **argv) \n\ s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\ \n\ + obj->skeleton = s; \n\ return 0; \n\ err: \n\ bpf_object__destroy_skeleton(s); \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 2c258db0d3521..97dec81950e5d 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -2,6 +2,7 @@ /* Copyright (C) 2020 Facebook */ #include +#include #include #include #include @@ -306,7 +307,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { link_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!link_table) { + if (IS_ERR(link_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index cc530a2298124..0bba33729c7f0 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -620,17 +620,14 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) u32_as_hash_field(info->id)) printf("\n\tpinned %s", (char *)entry->value); } - printf("\n"); if (frozen_str) { frozen = atoi(frozen_str); free(frozen_str); } - if (!info->btf_id && !frozen) - return 0; - - printf("\t"); + if (info->btf_id || frozen) + printf("\n\t"); if (info->btf_id) printf("btf_id %d", info->btf_id); @@ -699,7 +696,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!map_table) { + if (IS_ERR(map_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } @@ -805,29 +802,30 @@ static int maps_have_btf(int *fds, int nb_fds) static struct btf *btf_vmlinux; -static struct btf *get_map_kv_btf(const struct bpf_map_info *info) +static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf) { - struct btf *btf = NULL; + int err = 0; if (info->btf_vmlinux_value_type_id) { if (!btf_vmlinux) { btf_vmlinux = libbpf_find_kernel_btf(); - if (libbpf_get_error(btf_vmlinux)) + err = libbpf_get_error(btf_vmlinux); + if (err) { p_err("failed to get kernel btf"); + return err; + } } - return btf_vmlinux; + *btf = btf_vmlinux; } else if (info->btf_value_type_id) { - int err; - - btf = btf__load_from_kernel_by_id(info->btf_id); - err = libbpf_get_error(btf); - if (err) { + *btf = btf__load_from_kernel_by_id(info->btf_id); + err = libbpf_get_error(*btf); + if (err) p_err("failed to get btf"); - btf = ERR_PTR(err); - } + } else { + *btf = NULL; } - return btf; + return err; } static void free_map_kv_btf(struct btf *btf) @@ -862,8 +860,7 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, prev_key = NULL; if (wtr) { - btf = get_map_kv_btf(info); - err = libbpf_get_error(btf); + err = get_map_kv_btf(info, &btf); if (err) { goto exit_free; } @@ -1054,11 +1051,8 @@ static void print_key_value(struct bpf_map_info *info, void *key, json_writer_t *btf_wtr; struct btf *btf; - btf = btf__load_from_kernel_by_id(info->btf_id); - if (libbpf_get_error(btf)) { - p_err("failed to get btf"); + if (get_map_kv_btf(info, &btf)) return; - } if (json_output) { print_entry_json(info, key, value, btf); diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 56b598eee043a..7c384d10e95f8 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2020 Facebook */ #include +#include #include #include #include @@ -101,7 +102,7 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) libbpf_print_fn_t default_print; *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!*map) { + if (IS_ERR(*map)) { p_err("failed to create hashmap for PID references"); return -1; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2a21d50516bc4..33ca834d5f510 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -641,7 +641,7 @@ static int do_show(int argc, char **argv) if (show_pinned) { prog_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); - if (!prog_table) { + if (IS_ERR(prog_table)) { p_err("failed to create hashmap for pinned paths"); return -1; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b0383d371b9af..49340175feb94 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2286,8 +2286,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 90f56b0f585f0..e1b5056068828 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -206,10 +206,10 @@ #define __PT_PARM4_REG a3 #define __PT_PARM5_REG a4 #define __PT_RET_REG ra -#define __PT_FP_REG fp +#define __PT_FP_REG s0 #define __PT_RC_REG a5 #define __PT_SP_REG sp -#define __PT_IP_REG epc +#define __PT_IP_REG pc #endif diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 061839f045255..51862fdee850b 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id, const struct btf_dump_type_data_opts *opts); /* - * A set of helpers for easier BTF types handling + * A set of helpers for easier BTF types handling. + * + * The inline functions below rely on constants from the kernel headers which + * may not be available for applications including this header file. To avoid + * compilation errors, we define all the constants here that were added after + * the initial introduction of the BTF_KIND* constants. */ +#ifndef BTF_KIND_FUNC +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#endif +#ifndef BTF_KIND_VAR +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#endif +#ifndef BTF_KIND_FLOAT +#define BTF_KIND_FLOAT 16 /* Floating point */ +#endif +/* The kernel header switched to enums, so these two were never #defined */ +#define BTF_KIND_DECL_TAG 17 /* Decl Tag */ +#define BTF_KIND_TYPE_TAG 18 /* Type Tag */ + static inline __u16 btf_kind(const struct btf_type *t) { return BTF_INFO_KIND(t->info); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index b9a3260c83cbd..6b1bc1f43728c 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1505,6 +1505,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, if (s->name_resolved) return *cached_name ? *cached_name : orig_name; + if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) { + s->name_resolved = 1; + return orig_name; + } + dup_cnt = btf_dump_name_dups(d, name_map, orig_name); if (dup_cnt > 1) { const size_t max_len = 256; @@ -1861,14 +1866,16 @@ static int btf_dump_array_data(struct btf_dump *d, { const struct btf_array *array = btf_array(t); const struct btf_type *elem_type; - __u32 i, elem_size = 0, elem_type_id; + __u32 i, elem_type_id; + __s64 elem_size; bool is_array_member; elem_type_id = array->type; elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); elem_size = btf__resolve_size(d->btf, elem_type_id); if (elem_size <= 0) { - pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id); + pr_warn("unexpected elem size %zd for array type [%u]\n", + (ssize_t)elem_size, id); return -EINVAL; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7f10dd501a52b..94a6a8543cbc9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4854,7 +4854,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b LIBBPF_OPTS(bpf_map_create_opts, create_attr); struct bpf_map_def *def = &map->def; const char *map_name = NULL; - __u32 max_entries; int err = 0; if (kernel_supports(obj, FEAT_PROG_NAME)) @@ -4864,21 +4863,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; - if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { - int nr_cpus; - - nr_cpus = libbpf_num_possible_cpus(); - if (nr_cpus < 0) { - pr_warn("map '%s': failed to determine number of system CPUs: %d\n", - map->name, nr_cpus); - return nr_cpus; - } - pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); - max_entries = nr_cpus; - } else { - max_entries = def->max_entries; - } - if (bpf_map__is_struct_ops(map)) create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -4928,7 +4912,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (obj->gen_loader) { bpf_gen__map_create(obj->gen_loader, def->type, map_name, - def->key_size, def->value_size, max_entries, + def->key_size, def->value_size, def->max_entries, &create_attr, is_inner ? -1 : map - obj->maps); /* Pretend to have valid FD to pass various fd >= 0 checks. * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. @@ -4937,7 +4921,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } else { map->fd = bpf_map_create(def->type, map_name, def->key_size, def->value_size, - max_entries, &create_attr); + def->max_entries, &create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4954,7 +4938,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b map->btf_value_type_id = 0; map->fd = bpf_map_create(def->type, map_name, def->key_size, def->value_size, - max_entries, &create_attr); + def->max_entries, &create_attr); } err = map->fd < 0 ? -errno : 0; @@ -5058,6 +5042,24 @@ static int bpf_object_init_prog_arrays(struct bpf_object *obj) return 0; } +static int map_set_def_max_entries(struct bpf_map *map) +{ + if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { + int nr_cpus; + + nr_cpus = libbpf_num_possible_cpus(); + if (nr_cpus < 0) { + pr_warn("map '%s': failed to determine number of system CPUs: %d\n", + map->name, nr_cpus); + return nr_cpus; + } + pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); + map->def.max_entries = nr_cpus; + } + + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -5090,6 +5092,10 @@ bpf_object__create_maps(struct bpf_object *obj) continue; } + err = map_set_def_max_entries(map); + if (err) + goto err_out; + retried = false; retry: if (map->pin_path) { @@ -11795,6 +11801,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) { + if (!s) + return; + if (s->progs) bpf_object__detach_skeleton(s); if (s->obj) diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 5297839677930..9a89fdfe4987e 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -431,4 +431,4 @@ LIBBPF_0.7.0 { libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; libbpf_set_memlock_rlim_max; -}; +} LIBBPF_0.6.0; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 39f25e09b51e2..fadde7d80a51c 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -87,29 +87,75 @@ enum { NL_DONE, }; +static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags) +{ + int len; + + do { + len = recvmsg(sock, mhdr, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + + if (len < 0) + return -errno; + return len; +} + +static int alloc_iov(struct iovec *iov, int len) +{ + void *nbuf; + + nbuf = realloc(iov->iov_base, len); + if (!nbuf) + return -ENOMEM; + + iov->iov_base = nbuf; + iov->iov_len = len; + return 0; +} + static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, void *cookie) { + struct iovec iov = {}; + struct msghdr mhdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; bool multipart = true; struct nlmsgerr *err; struct nlmsghdr *nh; - char buf[4096]; int len, ret; + ret = alloc_iov(&iov, 4096); + if (ret) + goto done; + while (multipart) { start: multipart = false; - len = recv(sock, buf, sizeof(buf), 0); + len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC); + if (len < 0) { + ret = len; + goto done; + } + + if (len > iov.iov_len) { + ret = alloc_iov(&iov, len); + if (ret) + goto done; + } + + len = netlink_recvmsg(sock, &mhdr, 0); if (len < 0) { - ret = -errno; + ret = len; goto done; } if (len == 0) break; - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len); nh = NLMSG_NEXT(nh, len)) { if (nh->nlmsg_pid != nl_pid) { ret = -LIBBPF_ERRNO__WRNGPID; @@ -130,7 +176,8 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, libbpf_nla_dump_errormsg(nh); goto done; case NLMSG_DONE: - return 0; + ret = 0; + goto done; default: break; } @@ -142,15 +189,17 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, case NL_NEXT: goto start; case NL_DONE: - return 0; + ret = 0; + goto done; default: - return ret; + goto done; } } } } ret = 0; done: + free(iov.iov_base); return ret; } diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index edafe56664f3a..32a2f5749c711 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -1193,12 +1193,23 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, int xsk_umem__delete(struct xsk_umem *umem) { + struct xdp_mmap_offsets off; + int err; + if (!umem) return 0; if (umem->refcount) return -EBUSY; + err = xsk_get_mmap_offsets(umem->fd, &off); + if (!err && umem->fill_save && umem->comp_save) { + munmap(umem->fill_save->ring - off.fr.desc, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp_save->ring - off.cr.desc, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + close(umem->fd); free(umem); diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index fa854c83b7e7b..ed616fc19b4f2 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -69,7 +69,7 @@ static int test_stat_cpu(void) perf_evlist__set_maps(evlist, cpus, NULL); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { cpus = perf_evsel__cpus(evsel); @@ -130,7 +130,7 @@ static int test_stat_thread(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -187,7 +187,7 @@ static int test_stat_thread_enable(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -507,7 +507,7 @@ static int test_stat_multiplexing(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__enable(evlist); diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 8d9b55959256a..cfc208d71f00a 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -20,17 +20,27 @@ int arch_evlist__add_default_attrs(struct evlist *evlist) struct evsel *arch_evlist__leader(struct list_head *list) { - struct evsel *evsel, *first; + struct evsel *evsel, *first, *slots = NULL; + bool has_topdown = false; first = list_first_entry(list, struct evsel, core.node); if (!pmu_have_event("cpu", "slots")) return first; + /* If there is a slots event and a topdown event then the slots event comes first. */ __evlist__for_each_entry(list, evsel) { - if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && - evsel->name && strcasestr(evsel->name, "slots")) - return evsel; + if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && evsel->name) { + if (strcasestr(evsel->name, "slots")) { + slots = evsel; + if (slots == first) + return first; + } + if (!strncasecmp(evsel->name, "topdown", 7)) + has_topdown = true; + if (slots && has_topdown) + return slots; + } } return first; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3f98689dd6878..60baa3dadc4b6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -955,10 +955,10 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) * Enable counters and exec the command: */ if (forks) { - evlist__start_workload(evsel_list); err = enable_counters(); if (err) return -1; + evlist__start_workload(evsel_list); t0 = rdclock(); clock_gettime(CLOCK_MONOTONIC, &ref_time); diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json index 9ff67206ade4e..821d2f2a8f251 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json @@ -314,6 +314,19 @@ "SampleAfterValue": "2000003", "UMask": "0x82" }, + { + "BriefDescription": "All retired memory instructions.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ANY", + "L1_Hit_Indication": "1", + "PEBS": "1", + "PublicDescription": "Counts all retired memory instructions - loads and stores.", + "SampleAfterValue": "2000003", + "UMask": "0x83" + }, { "BriefDescription": "Retired load instructions with locked access.", "Counter": "0,1,2,3", @@ -358,6 +371,7 @@ "EventCode": "0xD0", "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", "PEBS": "1", + "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).", "SampleAfterValue": "100003", "UMask": "0x11" }, @@ -370,6 +384,7 @@ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", "L1_Hit_Indication": "1", "PEBS": "1", + "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).", "SampleAfterValue": "100003", "UMask": "0x12" }, @@ -733,7 +748,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010491", + "MSRValue": "0x10491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -772,7 +787,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0491", + "MSRValue": "0x4003C0491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -785,7 +800,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0491", + "MSRValue": "0x1003C0491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -798,7 +813,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0491", + "MSRValue": "0x8003C0491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -811,7 +826,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010490", + "MSRValue": "0x10490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -850,7 +865,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0490", + "MSRValue": "0x4003C0490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -863,7 +878,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0490", + "MSRValue": "0x1003C0490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -876,7 +891,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0490", + "MSRValue": "0x8003C0490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -889,7 +904,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010120", + "MSRValue": "0x10120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -928,7 +943,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0120", + "MSRValue": "0x4003C0120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -941,7 +956,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0120", + "MSRValue": "0x1003C0120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -954,7 +969,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0120", + "MSRValue": "0x8003C0120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -967,7 +982,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010122", + "MSRValue": "0x10122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1006,7 +1021,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0122", + "MSRValue": "0x4003C0122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1019,7 +1034,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0122", + "MSRValue": "0x1003C0122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1032,7 +1047,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0122", + "MSRValue": "0x8003C0122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1045,7 +1060,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010004", + "MSRValue": "0x10004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1084,7 +1099,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0004", + "MSRValue": "0x4003C0004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1097,7 +1112,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0004", + "MSRValue": "0x1003C0004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1110,7 +1125,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0004", + "MSRValue": "0x8003C0004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1123,7 +1138,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010001", + "MSRValue": "0x10001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1162,7 +1177,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0001", + "MSRValue": "0x4003C0001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1175,7 +1190,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0001", + "MSRValue": "0x1003C0001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1188,7 +1203,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0001", + "MSRValue": "0x8003C0001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1201,7 +1216,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010002", + "MSRValue": "0x10002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1240,7 +1255,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0002", + "MSRValue": "0x4003C0002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1253,7 +1268,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0002", + "MSRValue": "0x1003C0002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1266,7 +1281,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0002", + "MSRValue": "0x8003C0002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1279,7 +1294,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010400", + "MSRValue": "0x10400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1318,7 +1333,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0400", + "MSRValue": "0x4003C0400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1331,7 +1346,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0400", + "MSRValue": "0x1003C0400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1344,7 +1359,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0400", + "MSRValue": "0x8003C0400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1357,7 +1372,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010010", + "MSRValue": "0x10010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1396,7 +1411,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0010", + "MSRValue": "0x4003C0010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1409,7 +1424,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0010", + "MSRValue": "0x1003C0010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1422,7 +1437,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0010", + "MSRValue": "0x8003C0010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1435,7 +1450,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010020", + "MSRValue": "0x10020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1474,7 +1489,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0020", + "MSRValue": "0x4003C0020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1487,7 +1502,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0020", + "MSRValue": "0x1003C0020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1500,7 +1515,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0020", + "MSRValue": "0x8003C0020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1513,7 +1528,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010080", + "MSRValue": "0x10080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1552,7 +1567,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0080", + "MSRValue": "0x4003C0080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1565,7 +1580,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0080", + "MSRValue": "0x1003C0080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1578,7 +1593,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0080", + "MSRValue": "0x8003C0080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1591,7 +1606,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0000010100", + "MSRValue": "0x10100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1630,7 +1645,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x04003C0100", + "MSRValue": "0x4003C0100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1643,7 +1658,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x01003C0100", + "MSRValue": "0x1003C0100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1656,7 +1671,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x08003C0100", + "MSRValue": "0x8003C0100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json index 503737ed3a83c..9e873ab224502 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json @@ -1,73 +1,81 @@ [ { - "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", + "PublicDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x4" }, { - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instruction retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "PublicDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x8" }, { - "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", + "PublicDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x10" }, { - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "PublicDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x20" }, { - "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.", + "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE", + "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x40" }, { - "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.", + "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE", + "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x80" }, { - "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "PublicDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x1" }, { - "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "BriefDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "PublicDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", "SampleAfterValue": "2000003", "UMask": "0x2" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json index 078706a500919..ecce4273ae52c 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json @@ -30,7 +30,21 @@ "UMask": "0x2" }, { - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", + "BriefDescription": "Retired Instructions who experienced DSB miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x1", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xC6", @@ -38,7 +52,7 @@ "MSRIndex": "0x3F7", "MSRValue": "0x11", "PEBS": "1", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", + "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.", "SampleAfterValue": "100007", "TakenAlone": "1", "UMask": "0x1" diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json index 6f29b02fa320c..60c286b4fe54c 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json @@ -299,7 +299,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00491", + "MSRValue": "0x83FC00491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -312,7 +312,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00491", + "MSRValue": "0x63FC00491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -325,7 +325,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000491", + "MSRValue": "0x604000491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -338,7 +338,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800491", + "MSRValue": "0x63B800491", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -377,7 +377,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00490", + "MSRValue": "0x83FC00490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -390,7 +390,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00490", + "MSRValue": "0x63FC00490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -403,7 +403,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000490", + "MSRValue": "0x604000490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -416,7 +416,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800490", + "MSRValue": "0x63B800490", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -455,7 +455,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00120", + "MSRValue": "0x83FC00120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -468,7 +468,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00120", + "MSRValue": "0x63FC00120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -481,7 +481,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000120", + "MSRValue": "0x604000120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -494,7 +494,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800120", + "MSRValue": "0x63B800120", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -533,7 +533,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00122", + "MSRValue": "0x83FC00122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -546,7 +546,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00122", + "MSRValue": "0x63FC00122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -559,7 +559,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000122", + "MSRValue": "0x604000122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -572,7 +572,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800122", + "MSRValue": "0x63B800122", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -611,7 +611,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00004", + "MSRValue": "0x83FC00004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -624,7 +624,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00004", + "MSRValue": "0x63FC00004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -637,7 +637,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000004", + "MSRValue": "0x604000004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -650,7 +650,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800004", + "MSRValue": "0x63B800004", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -689,7 +689,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00001", + "MSRValue": "0x83FC00001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -702,7 +702,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00001", + "MSRValue": "0x63FC00001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -715,7 +715,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000001", + "MSRValue": "0x604000001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -728,7 +728,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800001", + "MSRValue": "0x63B800001", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -767,7 +767,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00002", + "MSRValue": "0x83FC00002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -780,7 +780,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00002", + "MSRValue": "0x63FC00002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -793,7 +793,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000002", + "MSRValue": "0x604000002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -806,7 +806,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800002", + "MSRValue": "0x63B800002", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -845,7 +845,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00400", + "MSRValue": "0x83FC00400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -858,7 +858,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00400", + "MSRValue": "0x63FC00400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -871,7 +871,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000400", + "MSRValue": "0x604000400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -884,7 +884,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800400", + "MSRValue": "0x63B800400", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -923,7 +923,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00010", + "MSRValue": "0x83FC00010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -936,7 +936,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00010", + "MSRValue": "0x63FC00010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -949,7 +949,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000010", + "MSRValue": "0x604000010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -962,7 +962,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800010", + "MSRValue": "0x63B800010", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1001,7 +1001,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00020", + "MSRValue": "0x83FC00020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1014,7 +1014,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00020", + "MSRValue": "0x63FC00020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1027,7 +1027,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000020", + "MSRValue": "0x604000020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1040,7 +1040,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800020", + "MSRValue": "0x63B800020", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1079,7 +1079,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00080", + "MSRValue": "0x83FC00080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1092,7 +1092,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00080", + "MSRValue": "0x63FC00080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1105,7 +1105,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000080", + "MSRValue": "0x604000080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1118,7 +1118,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800080", + "MSRValue": "0x63B800080", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1157,7 +1157,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x083FC00100", + "MSRValue": "0x83FC00100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1170,7 +1170,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063FC00100", + "MSRValue": "0x63FC00100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1183,7 +1183,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0604000100", + "MSRValue": "0x604000100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", @@ -1196,7 +1196,7 @@ "EventCode": "0xB7, 0xBB", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x063B800100", + "MSRValue": "0x63B800100", "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index ca57481206660..12eabae3e2242 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -435,6 +435,17 @@ "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", "SampleAfterValue": "2000003" }, + { + "BriefDescription": "Number of all retired NOP instructions.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.NOP", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, { "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", "Counter": "1", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 863c9e103969e..b016f7d1ff3de 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -1,26 +1,167 @@ [ + { + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)", + "MetricGroup": "TopdownL1", + "MetricName": "Frontend_Bound", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." + }, + { + "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Frontend_Bound_SMT", + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)", + "MetricGroup": "TopdownL1", + "MetricName": "Bad_Speculation", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example." + }, + { + "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Bad_Speculation_SMT", + "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricConstraint": "NO_NMI_WATCHDOG", + "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)", + "MetricGroup": "TopdownL1", + "MetricName": "Backend_Bound", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound." + }, + { + "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Backend_Bound_SMT", + "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)", + "MetricGroup": "TopdownL1", + "MetricName": "Retiring", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. " + }, + { + "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricGroup": "TopdownL1_SMT", + "MetricName": "Retiring_SMT", + "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) )", + "MetricGroup": "Bad;BadSpec;BrMispredicts", + "MetricName": "Mispredictions" + }, + { + "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", + "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )", + "MetricGroup": "Bad;BadSpec;BrMispredicts_SMT", + "MetricName": "Mispredictions_SMT" + }, + { + "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (OFFCORE_REQUESTS_BUFFER.SQ_FULL / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ", + "MetricGroup": "Mem;MemoryBW;Offcore", + "MetricName": "Memory_Bandwidth" + }, + { + "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ", + "MetricGroup": "Mem;MemoryBW;Offcore_SMT", + "MetricName": "Memory_Bandwidth_SMT" + }, + { + "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) )", + "MetricGroup": "Mem;MemoryLat;Offcore", + "MetricName": "Memory_Latency" + }, + { + "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) )", + "MetricGroup": "Mem;MemoryLat;Offcore_SMT", + "MetricName": "Memory_Latency_SMT" + }, + { + "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / CPU_CLK_UNHALTED.THREAD) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ", + "MetricGroup": "Mem;MemoryTLB", + "MetricName": "Memory_Data_TLBs" + }, + { + "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", + "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ", + "MetricGroup": "Mem;MemoryTLB;_SMT", + "MetricName": "Memory_Data_TLBs_SMT" + }, + { + "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)", + "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * CPU_CLK_UNHALTED.THREAD))", + "MetricGroup": "Ret", + "MetricName": "Branching_Overhead" + }, + { + "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)", + "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricGroup": "Ret_SMT", + "MetricName": "Branching_Overhead_SMT" + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", + "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))", + "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB", + "MetricName": "Big_Code" + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", + "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB_SMT", + "MetricName": "Big_Code_SMT" + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))", + "MetricGroup": "Fed;FetchBW;Frontend", + "MetricName": "Instruction_Fetch_BW" + }, + { + "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", + "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))", + "MetricGroup": "Fed;FetchBW;Frontend_SMT", + "MetricName": "Instruction_Fetch_BW_SMT" + }, { "BriefDescription": "Instructions Per Cycle (per Logical Processor)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Summary", + "MetricGroup": "Ret;Summary", "MetricName": "IPC" }, { "BriefDescription": "Uops Per Instruction", "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", - "MetricGroup": "Pipeline;Retire", + "MetricGroup": "Pipeline;Ret;Retire", "MetricName": "UPI" }, { "BriefDescription": "Instruction per taken branch", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;FetchBW;PGO", - "MetricName": "IpTB" + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;Fed;FetchBW", + "MetricName": "UpTB" }, { "BriefDescription": "Cycles Per Instruction (per Logical Processor)", "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)", - "MetricGroup": "Pipeline", + "MetricGroup": "Pipeline;Mem", "MetricName": "CPI" }, { @@ -30,39 +171,84 @@ "MetricName": "CLKS" }, { - "BriefDescription": "Instructions Per Cycle (per physical core)", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", + "MetricExpr": "4 * CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TmaL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", + "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "TmaL1_SMT", + "MetricName": "SLOTS_SMT" + }, + { + "BriefDescription": "The ratio of Executed- by Issued-Uops", + "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY", + "MetricGroup": "Cor;Pipeline", + "MetricName": "Execute_per_Issue", + "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage." + }, + { + "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "SMT;TmaL1", + "MetricGroup": "Ret;SMT;TmaL1", "MetricName": "CoreIPC" }, { - "BriefDescription": "Instructions Per Cycle (per physical core)", + "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", - "MetricGroup": "SMT;TmaL1", + "MetricGroup": "Ret;SMT;TmaL1_SMT", "MetricName": "CoreIPC_SMT" }, { "BriefDescription": "Floating Point Operations Per Cycle", "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Flops", + "MetricGroup": "Ret;Flops", "MetricName": "FLOPc" }, { "BriefDescription": "Floating Point Operations Per Cycle", "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", - "MetricGroup": "Flops_SMT", + "MetricGroup": "Ret;Flops_SMT", "MetricName": "FLOPc_SMT" }, + { + "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)", + "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.THREAD )", + "MetricGroup": "Cor;Flops;HPC", + "MetricName": "FP_Arith_Utilization", + "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting." + }, + { + "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", + "MetricGroup": "Cor;Flops;HPC_SMT", + "MetricName": "FP_Arith_Utilization_SMT", + "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting. SMT version; use when SMT is enabled and measuring per logical CPU." + }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", - "MetricGroup": "Pipeline;PortsUtil", + "MetricGroup": "Backend;Cor;Pipeline;PortsUtil", "MetricName": "ILP" }, + { + "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;BrMispredicts", + "MetricName": "Branch_Misprediction_Cost" + }, + { + "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", + "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;BrMispredicts_SMT", + "MetricName": "Branch_Misprediction_Cost_SMT" + }, { "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", - "MetricGroup": "BrMispredicts", + "MetricGroup": "Bad;BadSpec;BrMispredicts", "MetricName": "IpMispredict" }, { @@ -86,122 +272,249 @@ { "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", - "MetricGroup": "Branches;InsType", + "MetricGroup": "Branches;Fed;InsType", "MetricName": "IpBranch" }, { "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", - "MetricGroup": "Branches", + "MetricGroup": "Branches;Fed;PGO", "MetricName": "IpCall" }, + { + "BriefDescription": "Instruction per taken branch", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO", + "MetricName": "IpTB" + }, { "BriefDescription": "Branch instructions per taken branch. ", "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;PGO", + "MetricGroup": "Branches;Fed;PGO", "MetricName": "BpTkBranch" }, { "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", - "MetricGroup": "Flops;FpArith;InsType", + "MetricGroup": "Flops;InsType", "MetricName": "IpFLOP" }, + { + "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) )", + "MetricGroup": "Flops;InsType", + "MetricName": "IpArith", + "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW." + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "MetricGroup": "Flops;FpScalar;InsType", + "MetricName": "IpArith_Scalar_SP", + "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "MetricGroup": "Flops;FpScalar;InsType", + "MetricName": "IpArith_Scalar_DP", + "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE )", + "MetricGroup": "Flops;FpVector;InsType", + "MetricName": "IpArith_AVX128", + "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )", + "MetricGroup": "Flops;FpVector;InsType", + "MetricName": "IpArith_AVX256", + "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, + { + "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", + "MetricGroup": "Flops;FpVector;InsType", + "MetricName": "IpArith_AVX512", + "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." + }, { "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST", "MetricExpr": "INST_RETIRED.ANY", "MetricGroup": "Summary;TmaL1", "MetricName": "Instructions" }, + { + "BriefDescription": "Average number of Uops issued by front-end when it issued something", + "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@", + "MetricGroup": "Fed;FetchBW", + "MetricName": "Fetch_UpC" + }, { "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", - "MetricGroup": "DSB;FetchBW", + "MetricGroup": "DSB;Fed;FetchBW", "MetricName": "DSB_Coverage" }, { - "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", + "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.", + "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / CPU_CLK_UNHALTED.THREAD / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))", + "MetricGroup": "DSBmiss;Fed", + "MetricName": "DSB_Misses_Cost" + }, + { + "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.", + "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))", + "MetricGroup": "DSBmiss;Fed_SMT", + "MetricName": "DSB_Misses_Cost_SMT" + }, + { + "BriefDescription": "Number of Instructions per non-speculative DSB miss", + "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS", + "MetricGroup": "DSBmiss;Fed", + "MetricName": "IpDSB_Miss_Ret" + }, + { + "BriefDescription": "Fraction of branches that are non-taken conditionals", + "MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches;CodeGen;PGO", + "MetricName": "Cond_NT" + }, + { + "BriefDescription": "Fraction of branches that are taken conditionals", + "MetricExpr": "( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches;CodeGen;PGO", + "MetricName": "Cond_TK" + }, + { + "BriefDescription": "Fraction of branches that are CALL or RET", + "MetricExpr": "( BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN ) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches", + "MetricName": "CallRet" + }, + { + "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps", + "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES", + "MetricGroup": "Bad;Branches", + "MetricName": "Jump" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)", "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", - "MetricGroup": "MemoryBound;MemoryLat", - "MetricName": "Load_Miss_Real_Latency" + "MetricGroup": "Mem;MemoryBound;MemoryLat", + "MetricName": "Load_Miss_Real_Latency", + "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings." }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", - "MetricGroup": "MemoryBound;MemoryBW", + "MetricGroup": "Mem;MemoryBound;MemoryBW", "MetricName": "MLP" }, - { - "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricConstraint": "NO_NMI_WATCHDOG", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CORE_CLKS )", - "MetricGroup": "MemoryTLB", - "MetricName": "Page_Walks_Utilization" - }, { "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", - "MetricGroup": "MemoryBW", + "MetricGroup": "Mem;MemoryBW", "MetricName": "L1D_Cache_Fill_BW" }, { "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", - "MetricGroup": "MemoryBW", + "MetricGroup": "Mem;MemoryBW", "MetricName": "L2_Cache_Fill_BW" }, { "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", - "MetricGroup": "MemoryBW", + "MetricGroup": "Mem;MemoryBW", "MetricName": "L3_Cache_Fill_BW" }, { "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", - "MetricGroup": "MemoryBW;Offcore", + "MetricGroup": "Mem;MemoryBW;Offcore", "MetricName": "L3_Cache_Access_BW" }, { "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses", + "MetricGroup": "Mem;CacheMisses", "MetricName": "L1MPKI" }, + { + "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)", + "MetricExpr": "1000 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY", + "MetricGroup": "Mem;CacheMisses", + "MetricName": "L1MPKI_Load" + }, { "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses", + "MetricGroup": "Mem;Backend;CacheMisses", "MetricName": "L2MPKI" }, { "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses;Offcore", + "MetricGroup": "Mem;CacheMisses;Offcore", "MetricName": "L2MPKI_All" }, + { + "BriefDescription": "L2 cache misses per kilo instruction for all demand loads (including speculative)", + "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY", + "MetricGroup": "Mem;CacheMisses", + "MetricName": "L2MPKI_Load" + }, { "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses", + "MetricGroup": "Mem;CacheMisses", "MetricName": "L2HPKI_All" }, + { + "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)", + "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY", + "MetricGroup": "Mem;CacheMisses", + "MetricName": "L2HPKI_Load" + }, { "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", - "MetricGroup": "CacheMisses", + "MetricGroup": "Mem;CacheMisses", "MetricName": "L3MPKI" }, + { + "BriefDescription": "Fill Buffer (FB) true hits per kilo instructions for retired demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY", + "MetricGroup": "Mem;CacheMisses", + "MetricName": "FB_HPKI" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricConstraint": "NO_NMI_WATCHDOG", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CPU_CLK_UNHALTED.THREAD )", + "MetricGroup": "Mem;MemoryTLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", + "MetricGroup": "Mem;MemoryTLB_SMT", + "MetricName": "Page_Walks_Utilization_SMT" + }, { "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)", "MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY", - "MetricGroup": "L2Evicts;Server", + "MetricGroup": "L2Evicts;Mem;Server", "MetricName": "L2_Evictions_Silent_PKI" }, { "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction", "MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY", - "MetricGroup": "L2Evicts;Server", + "MetricGroup": "L2Evicts;Mem;Server", "MetricName": "L2_Evictions_NonSilent_PKI" }, { @@ -219,7 +532,7 @@ { "BriefDescription": "Giga Floating Point Operations Per Second", "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time", - "MetricGroup": "Flops;HPC", + "MetricGroup": "Cor;Flops;HPC", "MetricName": "GFLOPs" }, { @@ -228,6 +541,48 @@ "MetricGroup": "Power", "MetricName": "Turbo_Utilization" }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0", + "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Power", + "MetricName": "Power_License0_Utilization", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "Power_SMT", + "MetricName": "Power_License0_Utilization_SMT", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1", + "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Power", + "MetricName": "Power_License1_Utilization", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "Power_SMT", + "MetricName": "Power_License1_Utilization_SMT", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)", + "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Power", + "MetricName": "Power_License2_Utilization", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions." + }, + { + "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). SMT version; use when SMT is enabled and measuring per logical CPU.", + "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "Power_SMT", + "MetricName": "Power_License2_Utilization_SMT", + "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU." + }, { "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", @@ -240,34 +595,46 @@ "MetricGroup": "OS", "MetricName": "Kernel_Utilization" }, + { + "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k", + "MetricGroup": "OS", + "MetricName": "Kernel_CPI" + }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", - "MetricGroup": "HPC;MemoryBW;SoC", + "MetricGroup": "HPC;Mem;MemoryBW;SoC", "MetricName": "DRAM_BW_Use" }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", - "MetricGroup": "MemoryLat;SoC", + "MetricGroup": "Mem;MemoryLat;SoC", "MetricName": "MEM_Read_Latency" }, { "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@", - "MetricGroup": "MemoryBW;SoC", + "MetricGroup": "Mem;MemoryBW;SoC", "MetricName": "MEM_Parallel_Reads" }, + { + "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", + "MetricExpr": "1000000000 * ( UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS ) / imc_0@event\\=0x0@", + "MetricGroup": "Mem;MemoryLat;SoC;Server", + "MetricName": "MEM_DRAM_Read_Latency" + }, { "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]", "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time", - "MetricGroup": "IoBW;SoC;Server", + "MetricGroup": "IoBW;Mem;SoC;Server", "MetricName": "IO_Write_BW" }, { "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]", "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time", - "MetricGroup": "IoBW;SoC;Server", + "MetricGroup": "IoBW;Mem;SoC;Server", "MetricName": "IO_Read_BW" }, { diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json index 6ed92bc5c129b..06c5ca26ca3f3 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json @@ -537,6 +537,18 @@ "PublicDescription": "Counts clockticks of the 1GHz trafiic controller clock in the IIO unit.", "Unit": "IIO" }, + { + "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3", + "Counter": "0,1,2,3", + "EventCode": "0xC2", + "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.ALL_PARTS", + "FCMask": "0x4", + "PerPkg": "1", + "PortMask": "0x0f", + "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3", + "UMask": "0x03", + "Unit": "IIO" + }, { "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0", "Counter": "0,1,2,3", @@ -585,6 +597,17 @@ "UMask": "0x03", "Unit": "IIO" }, + { + "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3", + "Counter": "2,3", + "EventCode": "0xD5", + "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.ALL_PARTS", + "FCMask": "0x04", + "PerPkg": "1", + "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3", + "UMask": "0x0f", + "Unit": "IIO" + }, { "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0", "Counter": "2,3", diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh new file mode 100755 index 0000000000000..6ffbb27afabac --- /dev/null +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -0,0 +1,68 @@ +#!/bin/sh +# Check Arm64 callgraphs are complete in fp mode +# SPDX-License-Identifier: GPL-2.0 + +lscpu | grep -q "aarch64" || exit 2 + +if ! [ -x "$(command -v cc)" ]; then + echo "failed: no compiler, install gcc" + exit 2 +fi + +PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +TEST_PROGRAM_SOURCE=$(mktemp /tmp/test_program.XXXXX.c) +TEST_PROGRAM=$(mktemp /tmp/test_program.XXXXX) + +cleanup_files() +{ + rm -f $PERF_DATA + rm -f $TEST_PROGRAM_SOURCE + rm -f $TEST_PROGRAM +} + +trap cleanup_files exit term int + +cat << EOF > $TEST_PROGRAM_SOURCE +int a = 0; +void leaf(void) { + for (;;) + a += a; +} +void parent(void) { + leaf(); +} +int main(void) { + parent(); + return 0; +} +EOF + +echo " + Compiling test program ($TEST_PROGRAM)..." + +CFLAGS="-g -O0 -fno-inline -fno-omit-frame-pointer" +cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1 + +# Add a 1 second delay to skip samples that are not in the leaf() function +perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 -- $TEST_PROGRAM 2> /dev/null & +PID=$! + +echo " + Recording (PID=$PID)..." +sleep 2 +echo " + Stopping perf-record..." + +kill $PID +wait $PID + +# expected perf-script output: +# +# program +# 728 leaf +# 753 parent +# 76c main +# ... + +perf script -i $PERF_DATA -F comm,ip,sym | head -n4 +perf script -i $PERF_DATA -F comm,ip,sym | head -n4 | \ + awk '{ if ($2 != "") sym[i++] = $2 } END { if (sym[0] != "leaf" || + sym[1] != "parent" || + sym[2] != "main") exit 1 }' diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 1acdf2fc31c59..3299fb0977b27 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -25,7 +25,7 @@ cxl_pmem-y += config_check.o obj-m += cxl_core.o -cxl_core-y := $(CXL_CORE_SRC)/bus.o +cxl_core-y := $(CXL_CORE_SRC)/port.o cxl_core-y += $(CXL_CORE_SRC)/pmem.o cxl_core-y += $(CXL_CORE_SRC)/regs.o cxl_core-y += $(CXL_CORE_SRC)/memdev.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 736d99006fb7a..f0a410962af0d 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -511,7 +511,7 @@ static __init int cxl_test_init(void) for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) { struct platform_device *bridge = - cxl_host_bridge[i / NR_CXL_ROOT_PORTS]; + cxl_host_bridge[i % ARRAY_SIZE(cxl_host_bridge)]; struct platform_device *pdev; pdev = platform_device_alloc("cxl_root_port", i); diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c index d0f06e40c16d0..eac71fbb24ce2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -1,13 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include "bind_perm.skel.h" - +#define _GNU_SOURCE +#include +#include #include #include #include +#include "test_progs.h" +#include "bind_perm.skel.h" + static int duration; +static int create_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return -1; + + return 0; +} + void try_bind(int family, int port, int expected_errno) { struct sockaddr_storage addr = {}; @@ -75,6 +86,9 @@ void test_bind_perm(void) struct bind_perm *skel; int cgroup_fd; + if (create_netns()) + return; + cgroup_fd = test__join_cgroup("/bind_perm"); if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) return; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h new file mode 100644 index 0000000000000..5bb11fe595a43 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_MISC_H__ +#define __BPF_MISC_H__ + +#if defined(__TARGET_ARCH_x86) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__x64_" +#elif defined(__TARGET_ARCH_s390) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__s390x_" +#elif defined(__TARGET_ARCH_arm64) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__arm64_" +#else +#define SYSCALL_WRAPPER 0 +#define SYS_PREFIX "__se_" +#endif + +#endif diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 8812a90da4eb8..702578a5e496d 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -7,20 +7,7 @@ #include #include - -#if defined(__TARGET_ARCH_x86) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__x64_" -#elif defined(__TARGET_ARCH_s390) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__s390x_" -#elif defined(__TARGET_ARCH_arm64) -#define SYSCALL_WRAPPER 1 -#define SYS_PREFIX "__arm64_" -#else -#define SYSCALL_WRAPPER 0 -#define SYS_PREFIX "" -#endif +#include "bpf_misc.h" static struct sockaddr_in old; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 81b57b9aaaeae..7967348b11af6 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -113,7 +113,7 @@ static void tpcpy(struct bpf_tcp_sock *dst, #define RET_LOG() ({ \ linum = __LINE__; \ - bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \ + bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \ return CG_OK; \ }) diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh index ec4e15948e406..5252b91f48a18 100755 --- a/tools/testing/selftests/bpf/test_lirc_mode2.sh +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh @@ -3,6 +3,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +ret=$ksft_skip msg="skip all tests:" if [ $UID != 0 ]; then @@ -25,7 +26,7 @@ do fi done -if [ -n $LIRCDEV ]; +if [ -n "$LIRCDEV" ]; then TYPE=lirc_mode2 ./test_lirc_mode2_user $LIRCDEV $INPUTDEV @@ -36,3 +37,5 @@ then echo -e ${GREEN}"PASS: $TYPE"${NC} fi fi + +exit $ret diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index b497bb85b667f..6c69c42b1d607 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -120,6 +120,14 @@ setup() ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0 + # disable IPv6 DAD because it sometimes takes too long and fails tests + ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip link add veth1 type veth peer name veth2 ip link add veth3 type veth peer name veth4 ip link add veth5 type veth peer name veth6 @@ -289,7 +297,7 @@ test_ping() ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null RET=$? elif [ "${PROTO}" == "IPv6" ] ; then - ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null + ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null RET=$? else echo " test_ping: unknown PROTO: ${PROTO}" diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh index 05f8727409997..cc57cb87e65f6 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -32,6 +32,11 @@ DRV_MODE="xdpgeneric xdpdrv xdpegress" PASS=0 FAIL=0 LOG_DIR=$(mktemp -d) +declare -a NS +NS[0]="ns0-$(mktemp -u XXXXXX)" +NS[1]="ns1-$(mktemp -u XXXXXX)" +NS[2]="ns2-$(mktemp -u XXXXXX)" +NS[3]="ns3-$(mktemp -u XXXXXX)" test_pass() { @@ -47,11 +52,9 @@ test_fail() clean_up() { - for i in $(seq $NUM); do - ip link del veth$i 2> /dev/null - ip netns del ns$i 2> /dev/null + for i in $(seq 0 $NUM); do + ip netns del ${NS[$i]} 2> /dev/null done - ip netns del ns0 2> /dev/null } # Kselftest framework requirement - SKIP code is 4. @@ -79,23 +82,22 @@ setup_ns() mode="xdpdrv" fi - ip netns add ns0 + ip netns add ${NS[0]} for i in $(seq $NUM); do - ip netns add ns$i - ip -n ns$i link add veth0 index 2 type veth \ - peer name veth$i netns ns0 index $((1 + $i)) - ip -n ns0 link set veth$i up - ip -n ns$i link set veth0 up - - ip -n ns$i addr add 192.0.2.$i/24 dev veth0 - ip -n ns$i addr add 2001:db8::$i/64 dev veth0 + ip netns add ${NS[$i]} + ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]} + ip -n ${NS[$i]} link set veth0 up + ip -n ${NS[0]} link set veth$i up + + ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0 + ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0 # Add a neigh entry for IPv4 ping test - ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 - ip -n ns$i link set veth0 $mode obj \ + ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 + ip -n ${NS[$i]} link set veth0 $mode obj \ xdp_dummy.o sec xdp &> /dev/null || \ { test_fail "Unable to load dummy xdp" && exit 1; } IFACES="$IFACES veth$i" - veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}') + veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}') done } @@ -104,10 +106,10 @@ do_egress_tests() local mode=$1 # mac test - ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log & - ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log & + ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log & + ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log & sleep 0.5 - ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null + ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null sleep 0.5 pkill tcpdump @@ -123,18 +125,18 @@ do_ping_tests() local mode=$1 # ping6 test: echo request should be redirect back to itself, not others - ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 + ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 - ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log & - ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log & - ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & + ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log & + ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log & + ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & sleep 0.5 # ARP test - ip netns exec ns1 arping -q -c 2 -I veth0 192.0.2.254 + ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254 # IPv4 test - ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null + ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null # IPv6 test - ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null + ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null sleep 0.5 pkill tcpdump @@ -180,7 +182,7 @@ do_tests() xdpgeneric) drv_p="-S";; esac - ip netns exec ns0 ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & + ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & xdp_pid=$! sleep 1 if ! ps -p $xdp_pid > /dev/null; then @@ -197,10 +199,10 @@ do_tests() kill $xdp_pid } -trap clean_up EXIT - check_env +trap clean_up EXIT + for mode in ${DRV_MODE}; do setup_ns $mode do_tests $mode diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 0a5d23da486df..ffa5502ad95ed 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -906,7 +906,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject) return true; case STAT_TEST_RX_FULL: xsk_stat = stats.rx_ring_full; - expected_stat -= RX_FULL_RXQSIZE; + if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) + expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE; + else + expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE; break; case STAT_TEST_RX_FILL_EMPTY: xsk_stat = stats.rx_fill_ring_empty_descs; diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index a26a3fa9e9255..8bd847f0463cd 100644 --- a/tools/testing/selftests/lkdtm/config +++ b/tools/testing/selftests/lkdtm/config @@ -6,6 +6,7 @@ CONFIG_HARDENED_USERCOPY=y # CONFIG_HARDENED_USERCOPY_FALLBACK is not set CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y +CONFIG_UBSAN=y CONFIG_UBSAN_BOUNDS=y CONFIG_UBSAN_TRAP=y CONFIG_STACKPROTECTOR_STRONG=y diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index 3dece8b292536..b57e91e1c3f28 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -218,10 +218,10 @@ main(int argc, char **argv) /* Test 1: * veriyf that SIGURG is - * delivered and 63 bytes are - * read and oob is '@' + * delivered, 63 bytes are + * read, oob is '@', and POLLPRI works. */ - wait_for_data(pfd, POLLIN | POLLPRI); + wait_for_data(pfd, POLLPRI); read_oob(pfd, &oob); len = read_data(pfd, buf, 1024); if (!signal_recvd || len != 63 || oob != '@') { diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index f0f4ab96b8f3e..621af6895f4d5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -432,6 +432,8 @@ do_transfer() local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -524,6 +526,23 @@ do_transfer() fi fi + if $checksum; then + local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + + local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) + if [ $csum_err_s_nr -gt 0 ]; then + printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]" + rets=1 + fi + + local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) + if [ $csum_err_c_nr -gt 0 ]; then + printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]" + retc=1 + fi + fi + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then printf "[ OK ]" fi diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh index ea5a7a808f120..1fd1250ebc667 100755 --- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -120,11 +120,11 @@ echo "[ OK ]" # Move the underlay to a non-default VRF ip -netns hv-1 link set veth0 vrf vrf-underlay -ip -netns hv-1 link set veth0 down -ip -netns hv-1 link set veth0 up +ip -netns hv-1 link set vxlan0 down +ip -netns hv-1 link set vxlan0 up ip -netns hv-2 link set veth0 vrf vrf-underlay -ip -netns hv-2 link set veth0 down -ip -netns hv-2 link set veth0 up +ip -netns hv-2 link set vxlan0 down +ip -netns hv-2 link set vxlan0 up echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index aee631c5284eb..044bc0e9ed81a 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -325,8 +325,8 @@ int main(int argc, char **argv) struct ifreq device; struct ifreq hwtstamp; struct hwtstamp_config hwconfig, hwconfig_requested; - struct so_timestamping so_timestamping_get = { 0, -1 }; - struct so_timestamping so_timestamping = { 0, -1 }; + struct so_timestamping so_timestamping_get = { 0, 0 }; + struct so_timestamping so_timestamping = { 0, 0 }; struct sockaddr_in addr; struct ip_mreq imr; struct in_addr iaddr; diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 6e468e0f42f78..5d70b04c482c9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -683,6 +683,9 @@ TEST_F(tls, splice_cmsg_to_pipe) char buf[10]; int p[2]; + if (self->notls) + SKIP(return, "no TLS support"); + ASSERT_GE(pipe(p), 0); EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); @@ -703,6 +706,9 @@ TEST_F(tls, splice_dec_cmsg_to_pipe) char buf[10]; int p[2]; + if (self->notls) + SKIP(return, "no TLS support"); + ASSERT_GE(pipe(p), 0); EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index eae88aacca2aa..b2929fb15f7e6 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -71,8 +71,8 @@ usage () { echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\"" echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\"" echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" - echo " --doall" - echo " --doallmodconfig / --do-no-allmodconfig" + echo " --do-all" + echo " --do-allmodconfig / --do-no-allmodconfig" echo " --do-clocksourcewd / --do-no-clocksourcewd" echo " --do-kasan / --do-no-kasan" echo " --do-kcsan / --do-no-kcsan" diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 2956584e1e37f..75af864e07b65 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -4,7 +4,7 @@ include ../lib.mk .PHONY: all clean -CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \ +CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \ ../x86/trivial_64bit_program.c) ifndef OBJCOPY diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 9d4322c946e2b..006b464c8fc94 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -21,7 +21,7 @@ void encl_delete(struct encl *encl) { - struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + struct encl_segment *heap_seg; if (encl->encl_base) munmap((void *)encl->encl_base, encl->encl_size); @@ -32,10 +32,11 @@ void encl_delete(struct encl *encl) if (encl->fd) close(encl->fd); - munmap(heap_seg->src, heap_seg->size); - - if (encl->segment_tbl) + if (encl->segment_tbl) { + heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + munmap(heap_seg->src, heap_seg->size); free(encl->segment_tbl); + } memset(encl, 0, sizeof(*encl)); } diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 370c4995f7c4a..b0bd95a4730d5 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -147,6 +147,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, if (!encl_load("test_encl.elf", encl, heap_size)) { encl_delete(encl); TH_LOG("Failed to load the test enclave.\n"); + return false; } if (!encl_measure(encl)) @@ -185,8 +186,6 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, return true; err: - encl_delete(encl); - for (i = 0; i < encl->nr_segments; i++) { seg = &encl->segment_tbl[i]; @@ -207,6 +206,8 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, TH_LOG("Failed to initialize the test enclave.\n"); + encl_delete(encl); + return false; } diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index a14b5b8008970..1530c3e0242ef 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -51,9 +51,9 @@ TEST_GEN_FILES += split_huge_page_test TEST_GEN_FILES += ksm_tests ifeq ($(MACHINE),x86_64) -CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) -CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) +CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) TARGETS := protection_keys BINARIES_32 := $(TARGETS:%=%_32) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 3fc1d2ee29485..c964bfe9fbcda 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -120,6 +120,9 @@ struct uffd_stats { ~(unsigned long)(sizeof(unsigned long long) \ - 1))) +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + const char *examples = "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" "./userfaultfd anon 100 99999\n\n" diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 8a1f62ab3c8e6..53df7d3893d31 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -6,9 +6,9 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean UNAME_M := $(shell uname -m) -CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) -CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) +CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh index 3e2089c8cf549..8c669c0d662ee 100755 --- a/tools/testing/selftests/x86/check_cc.sh +++ b/tools/testing/selftests/x86/check_cc.sh @@ -7,7 +7,7 @@ CC="$1" TESTPROG="$2" shift 2 -if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then +if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then echo 1 else echo 0 diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 52c053cc1789d..e88f5c870141e 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -782,7 +782,7 @@ int osnoise_hist_main(int argc, char *argv[]) return_value = 0; if (!tracefs_trace_is_on(trace->inst)) { - printf("rtla timelat hit stop tracing\n"); + printf("rtla osnoise hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); save_trace_to_file(record->trace.inst, params->trace_output); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0afc016cc54d4..246168310a754 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -117,6 +117,8 @@ EXPORT_SYMBOL_GPL(kvm_debugfs_dir); static const struct file_operations stat_fops_per_vm; +static struct file_operations kvm_chardev_ops; + static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); #ifdef CONFIG_KVM_COMPAT @@ -1137,6 +1139,16 @@ static struct kvm *kvm_create_vm(unsigned long type) preempt_notifier_inc(); kvm_init_pm_notifier(kvm); + /* + * When the fd passed to this ioctl() is opened it pins the module, + * but try_module_get() also prevents getting a reference if the module + * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait"). + */ + if (!try_module_get(kvm_chardev_ops.owner)) { + r = -ENODEV; + goto out_err; + } + return kvm; out_err: @@ -1226,6 +1238,7 @@ static void kvm_destroy_vm(struct kvm *kvm) preempt_notifier_dec(); hardware_disable_all(); mmdrop(mm); + module_put(kvm_chardev_ops.owner); } void kvm_get_kvm(struct kvm *kvm) diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index ce878f4be4daa..1621f8efd9616 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -191,6 +191,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn); if (kvm_is_error_hva(gpc->uhva)) { + gpc->pfn = KVM_PFN_ERR_FAULT; ret = -EFAULT; goto out; }